Introduction of a pod condition type indicating disruption. Its reason
field indicates the reason:
- PreemptionByKubeScheduler (Pod preempted by kube-scheduler) - DeletionByTaintManager (Pod deleted by taint manager due to NoExecute taint) - EvictionByEvictionAPI (Pod evicted by Eviction API) - DeletionByPodGC (an orphaned Pod deleted by PodGC)PreemptedByScheduler (Pod preempted by kube-scheduler)
This commit is contained in:
@@ -26,12 +26,17 @@ import (
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/admission"
|
||||
"k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
|
||||
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
|
||||
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
|
||||
@@ -44,6 +49,140 @@ const poll = 2 * time.Second
|
||||
|
||||
type podCondition func(pod *v1.Pod) (bool, error)
|
||||
|
||||
// TestEvictionForNoExecuteTaintAddedByUser tests taint-based eviction for a node tainted NoExecute
|
||||
func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) {
|
||||
tests := map[string]struct {
|
||||
enablePodDisruptionConditions bool
|
||||
}{
|
||||
"Test eviciton for NoExecute taint added by user; pod condition added when PodDisruptionConditions enabled": {
|
||||
enablePodDisruptionConditions: true,
|
||||
},
|
||||
"Test eviciton for NoExecute taint added by user; no pod condition added when PodDisruptionConditions disabled": {
|
||||
enablePodDisruptionConditions: false,
|
||||
},
|
||||
}
|
||||
|
||||
for name, test := range tests {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
nodeIndex := 1
|
||||
nodeCount := 3
|
||||
var nodes []*v1.Node
|
||||
for i := 0; i < nodeCount; i++ {
|
||||
node := &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("testnode-%d", i),
|
||||
Labels: map[string]string{"node.kubernetes.io/exclude-disruption": "true"},
|
||||
},
|
||||
Spec: v1.NodeSpec{},
|
||||
Status: v1.NodeStatus{
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
testPod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testpod",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: nodes[nodeIndex].Name,
|
||||
Containers: []v1.Container{
|
||||
{Name: "container", Image: imageutils.GetPauseImageName()},
|
||||
},
|
||||
},
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
|
||||
testCtx := testutils.InitTestAPIServer(t, "taint-no-execute", nil)
|
||||
|
||||
// Build clientset and informers for controllers.
|
||||
defer testutils.CleanupTest(t, testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
// Build clientset and informers for controllers.
|
||||
externalClientConfig := restclient.CopyConfig(testCtx.KubeConfig)
|
||||
externalClientConfig.QPS = -1
|
||||
externalClientset := clientset.NewForConfigOrDie(externalClientConfig)
|
||||
externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
|
||||
|
||||
// Start NodeLifecycleController for taint.
|
||||
nc, err := nodelifecycle.NewNodeLifecycleController(
|
||||
testCtx.Ctx,
|
||||
externalInformers.Coordination().V1().Leases(),
|
||||
externalInformers.Core().V1().Pods(),
|
||||
externalInformers.Core().V1().Nodes(),
|
||||
externalInformers.Apps().V1().DaemonSets(),
|
||||
cs,
|
||||
1*time.Second, // Node monitor grace period
|
||||
time.Minute, // Node startup grace period
|
||||
time.Millisecond, // Node monitor period
|
||||
1, // Pod eviction timeout
|
||||
100, // Eviction limiter QPS
|
||||
100, // Secondary eviction limiter QPS
|
||||
50, // Large cluster threshold
|
||||
0.55, // Unhealthy zone threshold
|
||||
true, // Run taint manager
|
||||
)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create node controller: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Waiting for all controllers to sync
|
||||
externalInformers.Start(testCtx.Ctx.Done())
|
||||
externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||
|
||||
// Run all controllers
|
||||
go nc.Run(testCtx.Ctx)
|
||||
|
||||
for index := range nodes {
|
||||
nodes[index], err = cs.CoreV1().Nodes().Create(testCtx.Ctx, nodes[index], metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create node, err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
testPod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, testPod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
||||
}
|
||||
|
||||
if err := testutils.AddTaintToNode(cs, nodes[nodeIndex].Name, v1.Taint{Key: "CustomTaintByUser", Effect: v1.TaintEffectNoExecute}); err != nil {
|
||||
t.Errorf("Failed to taint node in test %s <%s>, err: %v", name, nodes[nodeIndex].Name, err)
|
||||
}
|
||||
|
||||
err = wait.PollImmediate(time.Second, time.Second*20, testutils.PodIsGettingEvicted(cs, testPod.Namespace, testPod.Name))
|
||||
if err != nil {
|
||||
t.Fatalf("Error %q in test %q when waiting for terminating pod: %q", err, name, klog.KObj(testPod))
|
||||
}
|
||||
testPod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, testPod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test Failed: error: %q, while getting updated pod", err)
|
||||
}
|
||||
_, cond := podutil.GetPodCondition(&testPod.Status, v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
if test.enablePodDisruptionConditions == true && cond == nil {
|
||||
t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(testPod), v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
} else if test.enablePodDisruptionConditions == false && cond != nil {
|
||||
t.Errorf("Pod %q has an unexpected condition: %q", klog.KObj(testPod), v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature
|
||||
func TestTaintBasedEvictions(t *testing.T) {
|
||||
// we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
|
||||
|
Reference in New Issue
Block a user