Merge pull request #118772 from kannon92/terminating-pod-gc
KEP-3939: pod gc changes for pod replacement policy kep
This commit is contained in:
		@@ -344,7 +344,11 @@ func (gcc *PodGCController) markFailedAndDeletePod(ctx context.Context, pod *v1.
 | 
				
			|||||||
func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Context, pod *v1.Pod, condition *corev1apply.PodConditionApplyConfiguration) error {
 | 
					func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Context, pod *v1.Pod, condition *corev1apply.PodConditionApplyConfiguration) error {
 | 
				
			||||||
	logger := klog.FromContext(ctx)
 | 
						logger := klog.FromContext(ctx)
 | 
				
			||||||
	logger.Info("PodGC is force deleting Pod", "pod", klog.KObj(pod))
 | 
						logger.Info("PodGC is force deleting Pod", "pod", klog.KObj(pod))
 | 
				
			||||||
	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
 | 
						// Patch the pod to make sure it is transitioned to the Failed phase before deletion.
 | 
				
			||||||
 | 
						// This is needed for the JobPodReplacementPolicy feature to make sure Job replacement pods are created.
 | 
				
			||||||
 | 
						// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-apps/3939-allow-replacement-when-fully-terminated#risks-and-mitigations
 | 
				
			||||||
 | 
						// for more details.
 | 
				
			||||||
 | 
						if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) || utilfeature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// Mark the pod as failed - this is especially important in case the pod
 | 
							// Mark the pod as failed - this is especially important in case the pod
 | 
				
			||||||
		// is orphaned, in which case the pod would remain in the Running phase
 | 
							// is orphaned, in which case the pod would remain in the Running phase
 | 
				
			||||||
@@ -357,7 +361,7 @@ func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Cont
 | 
				
			|||||||
			// PodGC it means that it is in the Failed phase, so sending the
 | 
								// PodGC it means that it is in the Failed phase, so sending the
 | 
				
			||||||
			// condition will not be re-attempted.
 | 
								// condition will not be re-attempted.
 | 
				
			||||||
			podApply.Status.WithPhase(v1.PodFailed)
 | 
								podApply.Status.WithPhase(v1.PodFailed)
 | 
				
			||||||
			if condition != nil {
 | 
								if condition != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
 | 
				
			||||||
				podApply.Status.WithConditions(condition)
 | 
									podApply.Status.WithConditions(condition)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			if _, err := gcc.kubeClient.CoreV1().Pods(pod.Namespace).ApplyStatus(ctx, podApply, metav1.ApplyOptions{FieldManager: fieldManager, Force: true}); err != nil {
 | 
								if _, err := gcc.kubeClient.CoreV1().Pods(pod.Namespace).ApplyStatus(ctx, podApply, metav1.ApplyOptions{FieldManager: fieldManager, Force: true}); err != nil {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -398,6 +398,14 @@ const (
 | 
				
			|||||||
	// that have never been unsuspended before.
 | 
						// that have never been unsuspended before.
 | 
				
			||||||
	JobMutableNodeSchedulingDirectives featuregate.Feature = "JobMutableNodeSchedulingDirectives"
 | 
						JobMutableNodeSchedulingDirectives featuregate.Feature = "JobMutableNodeSchedulingDirectives"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// owner: @kannon92
 | 
				
			||||||
 | 
						// kep : https://kep.k8s.io/3939
 | 
				
			||||||
 | 
						// alpha: v1.28
 | 
				
			||||||
 | 
						//
 | 
				
			||||||
 | 
						// Allow users to specify recreating pods of a job only when
 | 
				
			||||||
 | 
						// pods have fully terminated.
 | 
				
			||||||
 | 
						JobPodReplacementPolicy featuregate.Feature = "JobPodReplacementPolicy"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// owner: @mimowo
 | 
						// owner: @mimowo
 | 
				
			||||||
	// kep: https://kep.k8s.io/3329
 | 
						// kep: https://kep.k8s.io/3329
 | 
				
			||||||
	// alpha: v1.25
 | 
						// alpha: v1.25
 | 
				
			||||||
@@ -1033,6 +1041,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	JobPodFailurePolicy: {Default: true, PreRelease: featuregate.Beta},
 | 
						JobPodFailurePolicy: {Default: true, PreRelease: featuregate.Beta},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						JobPodReplacementPolicy: {Default: false, PreRelease: featuregate.Alpha},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	JobReadyPods: {Default: true, PreRelease: featuregate.Beta},
 | 
						JobReadyPods: {Default: true, PreRelease: featuregate.Beta},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	JobTrackingWithFinalizers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.28
 | 
						JobTrackingWithFinalizers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.28
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -41,6 +41,7 @@ import (
 | 
				
			|||||||
func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
					func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
				
			||||||
	tests := map[string]struct {
 | 
						tests := map[string]struct {
 | 
				
			||||||
		enablePodDisruptionConditions bool
 | 
							enablePodDisruptionConditions bool
 | 
				
			||||||
 | 
							enableJobPodReplacementPolicy bool
 | 
				
			||||||
		phase                         v1.PodPhase
 | 
							phase                         v1.PodPhase
 | 
				
			||||||
		wantPhase                     v1.PodPhase
 | 
							wantPhase                     v1.PodPhase
 | 
				
			||||||
		wantDisruptionTarget          *v1.PodCondition
 | 
							wantDisruptionTarget          *v1.PodCondition
 | 
				
			||||||
@@ -56,6 +57,24 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
				
			|||||||
				Message: "PodGC: node no longer exists",
 | 
									Message: "PodGC: node no longer exists",
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							"PodDisruptionConditions and PodReplacementPolicy enabled": {
 | 
				
			||||||
 | 
								enablePodDisruptionConditions: true,
 | 
				
			||||||
 | 
								enableJobPodReplacementPolicy: true,
 | 
				
			||||||
 | 
								phase:                         v1.PodPending,
 | 
				
			||||||
 | 
								wantPhase:                     v1.PodFailed,
 | 
				
			||||||
 | 
								wantDisruptionTarget: &v1.PodCondition{
 | 
				
			||||||
 | 
									Type:    v1.DisruptionTarget,
 | 
				
			||||||
 | 
									Status:  v1.ConditionTrue,
 | 
				
			||||||
 | 
									Reason:  "DeletionByPodGC",
 | 
				
			||||||
 | 
									Message: "PodGC: node no longer exists",
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							"Only PodReplacementPolicy enabled; no PodDisruptionCondition": {
 | 
				
			||||||
 | 
								enablePodDisruptionConditions: false,
 | 
				
			||||||
 | 
								enableJobPodReplacementPolicy: true,
 | 
				
			||||||
 | 
								phase:                         v1.PodPending,
 | 
				
			||||||
 | 
								wantPhase:                     v1.PodFailed,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
		"PodDisruptionConditions disabled": {
 | 
							"PodDisruptionConditions disabled": {
 | 
				
			||||||
			enablePodDisruptionConditions: false,
 | 
								enablePodDisruptionConditions: false,
 | 
				
			||||||
			phase:                         v1.PodPending,
 | 
								phase:                         v1.PodPending,
 | 
				
			||||||
@@ -76,6 +95,7 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
				
			|||||||
	for name, test := range tests {
 | 
						for name, test := range tests {
 | 
				
			||||||
		t.Run(name, func(t *testing.T) {
 | 
							t.Run(name, func(t *testing.T) {
 | 
				
			||||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
 | 
								defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
 | 
				
			||||||
 | 
								defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
 | 
				
			||||||
			testCtx := setup(t, "podgc-orphaned")
 | 
								testCtx := setup(t, "podgc-orphaned")
 | 
				
			||||||
			cs := testCtx.ClientSet
 | 
								cs := testCtx.ClientSet
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -151,6 +171,7 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
				
			|||||||
func TestTerminatingOnOutOfServiceNode(t *testing.T) {
 | 
					func TestTerminatingOnOutOfServiceNode(t *testing.T) {
 | 
				
			||||||
	tests := map[string]struct {
 | 
						tests := map[string]struct {
 | 
				
			||||||
		enablePodDisruptionConditions bool
 | 
							enablePodDisruptionConditions bool
 | 
				
			||||||
 | 
							enableJobPodReplacementPolicy bool
 | 
				
			||||||
		withFinalizer                 bool
 | 
							withFinalizer                 bool
 | 
				
			||||||
		wantPhase                     v1.PodPhase
 | 
							wantPhase                     v1.PodPhase
 | 
				
			||||||
	}{
 | 
						}{
 | 
				
			||||||
@@ -172,11 +193,19 @@ func TestTerminatingOnOutOfServiceNode(t *testing.T) {
 | 
				
			|||||||
			enablePodDisruptionConditions: false,
 | 
								enablePodDisruptionConditions: false,
 | 
				
			||||||
			withFinalizer:                 false,
 | 
								withFinalizer:                 false,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							"pod has phase changed when PodDisruptionConditions disabled, but JobPodReplacementPolicy enabled": {
 | 
				
			||||||
 | 
								enablePodDisruptionConditions: false,
 | 
				
			||||||
 | 
								enableJobPodReplacementPolicy: true,
 | 
				
			||||||
 | 
								withFinalizer:                 true,
 | 
				
			||||||
 | 
								wantPhase:                     v1.PodFailed,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for name, test := range tests {
 | 
						for name, test := range tests {
 | 
				
			||||||
		t.Run(name, func(t *testing.T) {
 | 
							t.Run(name, func(t *testing.T) {
 | 
				
			||||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
 | 
								defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
 | 
				
			||||||
 | 
								defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
 | 
				
			||||||
 | 
								defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
 | 
				
			||||||
			testCtx := setup(t, "podgc-out-of-service")
 | 
								testCtx := setup(t, "podgc-out-of-service")
 | 
				
			||||||
			cs := testCtx.ClientSet
 | 
								cs := testCtx.ClientSet
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user