Merge pull request #113360 from mimowo/handling-pod-failures-beta-enable

Enable the "Retriable and non-retriable pod failures for jobs" feature into beta
This commit is contained in:
Kubernetes Prow Robot
2022-11-09 08:30:24 -08:00
committed by GitHub
45 changed files with 399 additions and 139 deletions

View File

@@ -440,8 +440,7 @@ const (
// JobFailed means the job has failed its execution.
JobFailed JobConditionType = "Failed"
// FailureTarget means the job is about to fail its execution.
// The constant is to be renamed once the name is accepted within the KEP-3329.
AlphaNoCompatGuaranteeJobFailureTarget JobConditionType = "FailureTarget"
JobFailureTarget JobConditionType = "FailureTarget"
)
// JobCondition describes current state of a job.

View File

@@ -52,7 +52,7 @@ func TestSetDefaultJob(t *testing.T) {
Action: batchv1.PodFailurePolicyActionFailJob,
OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
{
@@ -75,7 +75,7 @@ func TestSetDefaultJob(t *testing.T) {
Action: batchv1.PodFailurePolicyActionFailJob,
OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
},
},
},
@@ -96,7 +96,7 @@ func TestSetDefaultJob(t *testing.T) {
Action: batchv1.PodFailurePolicyActionFailJob,
OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
{
@@ -120,7 +120,7 @@ func TestSetDefaultJob(t *testing.T) {
Action: batchv1.PodFailurePolicyActionFailJob,
OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},

View File

@@ -118,7 +118,7 @@ func TestValidateJob(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
},
},
@@ -456,7 +456,7 @@ func TestValidateJob(t *testing.T) {
},
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
},
},
@@ -558,7 +558,7 @@ func TestValidateJob(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
},
},
},
@@ -577,7 +577,7 @@ func TestValidateJob(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: "UnknownStatus",
},
},
@@ -968,7 +968,7 @@ func TestValidateJobUpdate(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
},
},
@@ -993,7 +993,7 @@ func TestValidateJobUpdate(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
},
},
@@ -1007,7 +1007,7 @@ func TestValidateJobUpdate(t *testing.T) {
Action: batch.PodFailurePolicyActionCount,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
},
},
@@ -1030,7 +1030,7 @@ func TestValidateJobUpdate(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
},
},

View File

@@ -2433,10 +2433,9 @@ const (
PodReasonSchedulingGated = "SchedulingGated"
// ContainersReady indicates whether all containers in the pod are ready.
ContainersReady PodConditionType = "ContainersReady"
// AlphaNoCompatGuaranteeDisruptionTarget indicates the pod is about to be terminated due to a
// DisruptionTarget indicates the pod is about to be terminated due to a
// disruption (such as preemption, eviction API or garbage-collection).
// The constant is to be renamed once the name is accepted within the KEP-3329.
AlphaNoCompatGuaranteeDisruptionTarget PodConditionType = "DisruptionTarget"
DisruptionTarget PodConditionType = "DisruptionTarget"
)
// PodCondition represents pod's condition

View File

@@ -755,7 +755,7 @@ func (dc *DisruptionController) syncStalePodDisruption(ctx context.Context, key
WithStatus(corev1apply.PodStatus()).
WithResourceVersion(pod.ResourceVersion)
podApply.Status.WithConditions(corev1apply.PodCondition().
WithType(v1.AlphaNoCompatGuaranteeDisruptionTarget).
WithType(v1.DisruptionTarget).
WithStatus(v1.ConditionFalse).
WithLastTransitionTime(metav1.Now()),
)
@@ -998,11 +998,11 @@ func (dc *DisruptionController) nonTerminatingPodHasStaleDisruptionCondition(pod
if pod.DeletionTimestamp != nil {
return false, 0
}
_, cond := apipod.GetPodCondition(&pod.Status, v1.AlphaNoCompatGuaranteeDisruptionTarget)
_, cond := apipod.GetPodCondition(&pod.Status, v1.DisruptionTarget)
// Pod disruption conditions added by kubelet are never considered stale because the condition might take
// arbitrarily long before the pod is terminating (has deletion timestamp). Also, pod conditions present
// on pods in terminal phase are not stale to avoid unnecessary status updates.
if cond == nil || cond.Status != v1.ConditionTrue || cond.Reason == v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet || apipod.IsPodPhaseTerminal(pod.Status.Phase) {
if cond == nil || cond.Status != v1.ConditionTrue || cond.Reason == v1.PodReasonTerminationByKubelet || apipod.IsPodPhaseTerminal(pod.Status.Phase) {
return false, 0
}
waitFor := dc.stalePodDisruptionTimeout - dc.clock.Since(cond.LastTransitionTime.Time)

View File

@@ -1403,7 +1403,7 @@ func TestStalePodDisruption(t *testing.T) {
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: now},
},
@@ -1413,7 +1413,7 @@ func TestStalePodDisruption(t *testing.T) {
timePassed: 2*time.Minute + time.Second,
wantConditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -1427,7 +1427,7 @@ func TestStalePodDisruption(t *testing.T) {
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: now},
},
@@ -1437,7 +1437,7 @@ func TestStalePodDisruption(t *testing.T) {
timePassed: 2*time.Minute - time.Second,
wantConditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -1452,7 +1452,7 @@ func TestStalePodDisruption(t *testing.T) {
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: now},
},
@@ -1462,7 +1462,7 @@ func TestStalePodDisruption(t *testing.T) {
timePassed: 2*time.Minute + time.Second,
wantConditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -1487,7 +1487,7 @@ func TestStalePodDisruption(t *testing.T) {
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -1496,7 +1496,7 @@ func TestStalePodDisruption(t *testing.T) {
timePassed: 2*time.Minute + time.Second,
wantConditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},

View File

@@ -758,12 +758,12 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
(failed > *job.Spec.BackoffLimit)
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.AlphaNoCompatGuaranteeJobFailureTarget); failureTargetCondition != nil {
if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil {
finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition)
} else if failJobMessage := getFailJobMessage(&job, pods, uncounted.Failed()); failJobMessage != nil {
if uncounted != nil {
// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
finishedCondition = newCondition(batch.AlphaNoCompatGuaranteeJobFailureTarget, v1.ConditionTrue, jobConditionReasonPodFailurePolicy, *failJobMessage)
finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, jobConditionReasonPodFailurePolicy, *failJobMessage)
} else {
// Prepare the Failed job condition for the legacy path without finalizers (don't use the interim FailureTarget condition).
finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, jobConditionReasonPodFailurePolicy, *failJobMessage)
@@ -1090,7 +1090,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
job.Status.CompletedIndexes = succeededIndexes.String()
}
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
if finishedCond != nil && finishedCond.Type == batch.AlphaNoCompatGuaranteeJobFailureTarget {
if finishedCond != nil && finishedCond.Type == batch.JobFailureTarget {
// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
job.Status.Conditions = append(job.Status.Conditions, *finishedCond)

View File

@@ -2192,7 +2192,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Status: batch.JobStatus{
Conditions: []batch.JobCondition{
{
Type: batch.AlphaNoCompatGuaranteeJobFailureTarget,
Type: batch.JobFailureTarget,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
@@ -2245,7 +2245,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Status: batch.JobStatus{
Conditions: []batch.JobCondition{
{
Type: batch.AlphaNoCompatGuaranteeJobFailureTarget,
Type: batch.JobFailureTarget,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
@@ -2751,7 +2751,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -2769,7 +2769,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Status: v1.ConditionTrue,
},
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -2797,7 +2797,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -2811,7 +2811,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -2839,7 +2839,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionFailJob,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -2853,7 +2853,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},

View File

@@ -89,7 +89,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: "UnkonwnAction",
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -98,7 +98,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -111,7 +111,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -410,7 +410,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -423,7 +423,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -440,7 +440,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -453,7 +453,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -470,7 +470,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionUnknown,
},
},
@@ -483,7 +483,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionUnknown,
},
},
@@ -500,7 +500,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -513,7 +513,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -529,7 +529,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -542,7 +542,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -558,7 +558,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -571,7 +571,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionFalse,
},
},
@@ -587,7 +587,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionFailJob,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -600,7 +600,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -617,7 +617,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionCount,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -630,7 +630,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
@@ -677,7 +677,7 @@ func TestMatchPodFailurePolicy(t *testing.T) {
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},

View File

@@ -129,7 +129,7 @@ func addConditionAndDeletePod(ctx context.Context, c clientset.Interface, name,
}
podApply := corev1apply.Pod(pod.Name, pod.Namespace).WithStatus(corev1apply.PodStatus())
podApply.Status.WithConditions(corev1apply.PodCondition().
WithType(v1.AlphaNoCompatGuaranteeDisruptionTarget).
WithType(v1.DisruptionTarget).
WithStatus(v1.ConditionTrue).
WithReason("DeletionByTaintManager").
WithMessage("Taint manager: deleting due to NoExecute taint").

View File

@@ -327,6 +327,7 @@ func TestCreateNode(t *testing.T) {
description string
pods []v1.Pod
node *v1.Node
expectPatch bool
expectDelete bool
}{
{
@@ -335,6 +336,7 @@ func TestCreateNode(t *testing.T) {
*testutil.NewPod("pod1", "node1"),
},
node: testutil.NewNode("node1"),
expectPatch: false,
expectDelete: false,
},
{
@@ -343,6 +345,7 @@ func TestCreateNode(t *testing.T) {
*testutil.NewPod("pod1", "node1"),
},
node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectPatch: true,
expectDelete: true,
},
{
@@ -351,6 +354,7 @@ func TestCreateNode(t *testing.T) {
*addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
},
node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectPatch: false,
expectDelete: false,
},
}
@@ -366,7 +370,7 @@ func TestCreateNode(t *testing.T) {
// wait a bit
time.Sleep(timeForControllerToProgress)
verifyPodActions(t, item.description, fakeClientset, false, item.expectDelete)
verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
cancel()
}
@@ -766,6 +770,7 @@ func TestEventualConsistency(t *testing.T) {
newPod *v1.Pod
oldNode *v1.Node
newNode *v1.Node
expectPatch bool
expectDelete bool
}{
{
@@ -777,6 +782,7 @@ func TestEventualConsistency(t *testing.T) {
newPod: testutil.NewPod("pod2", "node1"),
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectPatch: true,
expectDelete: true,
},
{
@@ -788,6 +794,7 @@ func TestEventualConsistency(t *testing.T) {
newPod: addToleration(testutil.NewPod("pod2", "node1"), 1, 100),
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectPatch: true,
expectDelete: true,
},
{
@@ -799,6 +806,7 @@ func TestEventualConsistency(t *testing.T) {
newPod: testutil.NewPod("pod2", "node1"),
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectPatch: true,
expectDelete: true,
},
{
@@ -810,6 +818,7 @@ func TestEventualConsistency(t *testing.T) {
newPod: addToleration(testutil.NewPod("pod2", "node1"), 1, 100),
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectPatch: true,
expectDelete: true,
},
}
@@ -835,7 +844,7 @@ func TestEventualConsistency(t *testing.T) {
// TODO(mborsz): Remove this sleep and other sleeps in this file.
time.Sleep(timeForControllerToProgress)
verifyPodActions(t, item.description, fakeClientset, false, item.expectDelete)
verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
fakeClientset.ClearActions()
// And now the delayed update of 'pod2' comes to the TaintManager. We should delete it as well.

View File

@@ -246,12 +246,11 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
}
klog.V(2).InfoS("Found orphaned Pod assigned to the Node, deleting.", "pod", klog.KObj(pod), "node", pod.Spec.NodeName)
condition := corev1apply.PodCondition().
WithType(v1.AlphaNoCompatGuaranteeDisruptionTarget).
WithType(v1.DisruptionTarget).
WithStatus(v1.ConditionTrue).
WithReason("DeletionByPodGC").
WithMessage("PodGC: node no longer exists").
WithLastTransitionTime(metav1.Now())
if err := gcc.markFailedAndDeletePodWithCondition(ctx, pod, condition); err != nil {
utilruntime.HandleError(err)
} else {

View File

@@ -436,6 +436,7 @@ const (
// owner: @mimowo
// kep: https://kep.k8s.io/3329
// alpha: v1.25
// beta: v1.26
//
// Allow users to specify handling of pod failures based on container exit codes
// and pod conditions.
@@ -643,6 +644,7 @@ const (
// owner: @mimowo
// kep: https://kep.k8s.io/3329
// alpha: v1.25
// beta: v1.26
//
// Enables support for appending a dedicated pod condition indicating that
// the pod is being deleted due to a disruption.
@@ -993,7 +995,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
IPTablesOwnershipCleanup: {Default: false, PreRelease: featuregate.Alpha},
JobPodFailurePolicy: {Default: false, PreRelease: featuregate.Alpha},
JobPodFailurePolicy: {Default: true, PreRelease: featuregate.Beta},
JobMutableNodeSchedulingDirectives: {Default: true, PreRelease: featuregate.Beta},
@@ -1049,7 +1051,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
PodDeletionCost: {Default: true, PreRelease: featuregate.Beta},
PodDisruptionConditions: {Default: false, PreRelease: featuregate.Alpha},
PodDisruptionConditions: {Default: true, PreRelease: featuregate.Beta},
PodHasNetworkCondition: {Default: false, PreRelease: featuregate.Alpha},

View File

@@ -392,9 +392,9 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
var condition *v1.PodCondition
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
condition = &v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
Reason: v1.PodReasonTerminationByKubelet,
Message: message,
}
}

View File

@@ -1517,7 +1517,7 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
// updated during the eviciton (due to either node resource pressure or
// node graceful shutdown). We do not re-generate the conditions based
// on the container statuses as they are added based on one-time events.
cType := v1.AlphaNoCompatGuaranteeDisruptionTarget
cType := v1.DisruptionTarget
if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil {
s.Conditions = utilpod.ReplaceOrAppendPodCondition(s.Conditions, condition)
}

View File

@@ -2522,7 +2522,7 @@ func Test_generateAPIPodStatus(t *testing.T) {
runningState("containerB"),
},
Conditions: []v1.PodCondition{{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
LastTransitionTime: normalized_now,
}},
@@ -2536,7 +2536,7 @@ func Test_generateAPIPodStatus(t *testing.T) {
runningState("containerB"),
},
Conditions: []v1.PodCondition{{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
LastTransitionTime: normalized_now,
}},
@@ -2558,7 +2558,7 @@ func Test_generateAPIPodStatus(t *testing.T) {
},
},
expectedPodDisruptionCondition: v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
LastTransitionTime: normalized_now,
},

View File

@@ -383,9 +383,9 @@ func (m *managerImpl) processShutdownEvent() error {
status.Reason = nodeShutdownReason
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
podutil.UpdatePodCondition(status, &v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
Reason: v1.PodReasonTerminationByKubelet,
Message: nodeShutdownMessage,
})
}

View File

@@ -170,7 +170,7 @@ func TestManager(t *testing.T) {
Reason: "Terminated",
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
Message: "Pod was terminated in response to imminent node shutdown.",
@@ -183,7 +183,7 @@ func TestManager(t *testing.T) {
Reason: "Terminated",
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
Message: "Pod was terminated in response to imminent node shutdown.",
@@ -196,7 +196,7 @@ func TestManager(t *testing.T) {
Reason: "Terminated",
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
Message: "Pod was terminated in response to imminent node shutdown.",

View File

@@ -506,7 +506,7 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
// Set DisruptionTarget.LastTransitionTime.
updateLastTransitionTime(&status, &oldStatus, v1.AlphaNoCompatGuaranteeDisruptionTarget)
updateLastTransitionTime(&status, &oldStatus, v1.DisruptionTarget)
}
// ensure that the start time does not change across updates.
@@ -895,7 +895,7 @@ func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningCon
podConditions = append(podConditions, c)
} else if kubetypes.PodConditionSharedByKubelet(c.Type) {
// we replace or append all the "shared by kubelet" conditions
if c.Type == v1.AlphaNoCompatGuaranteeDisruptionTarget {
if c.Type == v1.DisruptionTarget {
// guard the update of the DisruptionTarget condition with a check to ensure
// it will only be sent once all containers have terminated and the phase
// is terminal. This avoids sending an unnecessary patch request to add

View File

@@ -1426,7 +1426,7 @@ func TestMergePodStatus(t *testing.T) {
func(input v1.PodStatus) v1.PodStatus {
input.Phase = v1.PodFailed
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1436,7 +1436,7 @@ func TestMergePodStatus(t *testing.T) {
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
},
@@ -1466,7 +1466,7 @@ func TestMergePodStatus(t *testing.T) {
func(input v1.PodStatus) v1.PodStatus {
input.Phase = v1.PodFailed
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1493,7 +1493,7 @@ func TestMergePodStatus(t *testing.T) {
false,
func(input v1.PodStatus) v1.PodStatus {
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1514,7 +1514,7 @@ func TestMergePodStatus(t *testing.T) {
Status: v1.ConditionTrue,
},
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
},
@@ -1528,7 +1528,7 @@ func TestMergePodStatus(t *testing.T) {
false,
func(input v1.PodStatus) v1.PodStatus {
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1549,7 +1549,7 @@ func TestMergePodStatus(t *testing.T) {
Status: v1.ConditionTrue,
},
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
},
@@ -1563,7 +1563,7 @@ func TestMergePodStatus(t *testing.T) {
false,
func(input v1.PodStatus) v1.PodStatus {
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "EvictedByEvictionAPI",
})
@@ -1572,7 +1572,7 @@ func TestMergePodStatus(t *testing.T) {
func(input v1.PodStatus) v1.PodStatus {
input.Phase = v1.PodFailed
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1596,7 +1596,7 @@ func TestMergePodStatus(t *testing.T) {
Status: v1.ConditionTrue,
},
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
},
@@ -1610,7 +1610,7 @@ func TestMergePodStatus(t *testing.T) {
false,
func(input v1.PodStatus) v1.PodStatus {
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "EvictedByEvictionAPI",
})
@@ -1618,7 +1618,7 @@ func TestMergePodStatus(t *testing.T) {
},
func(input v1.PodStatus) v1.PodStatus {
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1628,7 +1628,7 @@ func TestMergePodStatus(t *testing.T) {
Phase: v1.PodRunning,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "EvictedByEvictionAPI",
},
@@ -1650,7 +1650,7 @@ func TestMergePodStatus(t *testing.T) {
true,
func(input v1.PodStatus) v1.PodStatus {
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "EvictedByEvictionAPI",
})
@@ -1659,7 +1659,7 @@ func TestMergePodStatus(t *testing.T) {
func(input v1.PodStatus) v1.PodStatus {
input.Phase = v1.PodFailed
input.Conditions = append(input.Conditions, v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "TerminationByKubelet",
})
@@ -1669,7 +1669,7 @@ func TestMergePodStatus(t *testing.T) {
Phase: v1.PodRunning,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "EvictedByEvictionAPI",
},

View File

@@ -48,7 +48,7 @@ func PodConditionByKubelet(conditionType v1.PodConditionType) bool {
// PodConditionSharedByKubelet returns if the pod condition type is shared by kubelet
func PodConditionSharedByKubelet(conditionType v1.PodConditionType) bool {
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
if conditionType == v1.AlphaNoCompatGuaranteeDisruptionTarget {
if conditionType == v1.DisruptionTarget {
return true
}
}

View File

@@ -59,7 +59,7 @@ func TestPodConditionSharedByKubelet(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)()
trueCases := []v1.PodConditionType{
v1.AlphaNoCompatGuaranteeDisruptionTarget,
v1.DisruptionTarget,
}
for _, tc := range trueCases {

View File

@@ -302,7 +302,7 @@ func addConditionAndDeletePod(r *EvictionREST, ctx context.Context, name string,
conditionAppender := func(_ context.Context, newObj, _ runtime.Object) (runtime.Object, error) {
podObj := newObj.(*api.Pod)
podutil.UpdatePodCondition(&podObj.Status, &api.PodCondition{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Type: api.DisruptionTarget,
Status: api.ConditionTrue,
Reason: "EvictionByEvictionAPI",
Message: "Eviction API: evicting",

View File

@@ -328,18 +328,18 @@ func TestPostFilter(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cs := clientsetfake.NewSimpleClientset()
// index the potential victim pods in the fake client so that the victims deletion logic does not fail
podItems := []v1.Pod{}
for _, pod := range tt.pods {
podItems = append(podItems, *pod)
}
cs := clientsetfake.NewSimpleClientset(&v1.PodList{Items: podItems})
informerFactory := informers.NewSharedInformerFactory(cs, 0)
podInformer := informerFactory.Core().V1().Pods().Informer()
podInformer.GetStore().Add(tt.pod)
for i := range tt.pods {
podInformer.GetStore().Add(tt.pods[i])
}
// As we use a bare clientset above, it's needed to add a reactor here
// to not fail Victims deletion logic.
cs.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
return true, nil, nil
})
// Register NodeResourceFit as the Filter & PreFilter plugin.
registeredPlugins := []st.RegisterPluginFunc{
st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
@@ -1642,6 +1642,11 @@ func TestPreempt(t *testing.T) {
}
deletedPodNames := make(sets.String)
patchedPodNames := make(sets.String)
client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
patchedPodNames.Insert(action.(clienttesting.PatchAction).GetName())
return true, nil, nil
})
client.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
deletedPodNames.Insert(action.(clienttesting.DeleteAction).GetName())
return true, nil, nil
@@ -1729,6 +1734,9 @@ func TestPreempt(t *testing.T) {
if len(deletedPodNames) != len(test.expectedPods) {
t.Errorf("expected %v pods, got %v.", len(test.expectedPods), len(deletedPodNames))
}
if diff := cmp.Diff(patchedPodNames.List(), deletedPodNames.List()); diff != "" {
t.Errorf("unexpected difference in the set of patched and deleted pods: %s", diff)
}
for victimName := range deletedPodNames {
found := false
for _, expPod := range test.expectedPods {

View File

@@ -359,7 +359,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
victimPodApply := corev1apply.Pod(victim.Name, victim.Namespace).WithStatus(corev1apply.PodStatus())
victimPodApply.Status.WithConditions(corev1apply.PodCondition().
WithType(v1.AlphaNoCompatGuaranteeDisruptionTarget).
WithType(v1.DisruptionTarget).
WithStatus(v1.ConditionTrue).
WithReason("PreemptionByKubeScheduler").
WithMessage(fmt.Sprintf("Kube-scheduler: preempting to accommodate a higher priority pod: %s", klog.KObj(pod))).