Merge pull request #118772 from kannon92/terminating-pod-gc
KEP-3939: pod gc changes for pod replacement policy kep
This commit is contained in:
		@@ -344,7 +344,11 @@ func (gcc *PodGCController) markFailedAndDeletePod(ctx context.Context, pod *v1.
 | 
			
		||||
func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Context, pod *v1.Pod, condition *corev1apply.PodConditionApplyConfiguration) error {
 | 
			
		||||
	logger := klog.FromContext(ctx)
 | 
			
		||||
	logger.Info("PodGC is force deleting Pod", "pod", klog.KObj(pod))
 | 
			
		||||
	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
 | 
			
		||||
	// Patch the pod to make sure it is transitioned to the Failed phase before deletion.
 | 
			
		||||
	// This is needed for the JobPodReplacementPolicy feature to make sure Job replacement pods are created.
 | 
			
		||||
	// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-apps/3939-allow-replacement-when-fully-terminated#risks-and-mitigations
 | 
			
		||||
	// for more details.
 | 
			
		||||
	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) || utilfeature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
 | 
			
		||||
 | 
			
		||||
		// Mark the pod as failed - this is especially important in case the pod
 | 
			
		||||
		// is orphaned, in which case the pod would remain in the Running phase
 | 
			
		||||
@@ -357,7 +361,7 @@ func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Cont
 | 
			
		||||
			// PodGC it means that it is in the Failed phase, so sending the
 | 
			
		||||
			// condition will not be re-attempted.
 | 
			
		||||
			podApply.Status.WithPhase(v1.PodFailed)
 | 
			
		||||
			if condition != nil {
 | 
			
		||||
			if condition != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
 | 
			
		||||
				podApply.Status.WithConditions(condition)
 | 
			
		||||
			}
 | 
			
		||||
			if _, err := gcc.kubeClient.CoreV1().Pods(pod.Namespace).ApplyStatus(ctx, podApply, metav1.ApplyOptions{FieldManager: fieldManager, Force: true}); err != nil {
 | 
			
		||||
 
 | 
			
		||||
@@ -398,6 +398,14 @@ const (
 | 
			
		||||
	// that have never been unsuspended before.
 | 
			
		||||
	JobMutableNodeSchedulingDirectives featuregate.Feature = "JobMutableNodeSchedulingDirectives"
 | 
			
		||||
 | 
			
		||||
	// owner: @kannon92
 | 
			
		||||
	// kep : https://kep.k8s.io/3939
 | 
			
		||||
	// alpha: v1.28
 | 
			
		||||
	//
 | 
			
		||||
	// Allow users to specify recreating pods of a job only when
 | 
			
		||||
	// pods have fully terminated.
 | 
			
		||||
	JobPodReplacementPolicy featuregate.Feature = "JobPodReplacementPolicy"
 | 
			
		||||
 | 
			
		||||
	// owner: @mimowo
 | 
			
		||||
	// kep: https://kep.k8s.io/3329
 | 
			
		||||
	// alpha: v1.25
 | 
			
		||||
@@ -1033,6 +1041,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
 | 
			
		||||
 | 
			
		||||
	JobPodFailurePolicy: {Default: true, PreRelease: featuregate.Beta},
 | 
			
		||||
 | 
			
		||||
	JobPodReplacementPolicy: {Default: false, PreRelease: featuregate.Alpha},
 | 
			
		||||
 | 
			
		||||
	JobReadyPods: {Default: true, PreRelease: featuregate.Beta},
 | 
			
		||||
 | 
			
		||||
	JobTrackingWithFinalizers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.28
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,7 @@ import (
 | 
			
		||||
func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
			
		||||
	tests := map[string]struct {
 | 
			
		||||
		enablePodDisruptionConditions bool
 | 
			
		||||
		enableJobPodReplacementPolicy bool
 | 
			
		||||
		phase                         v1.PodPhase
 | 
			
		||||
		wantPhase                     v1.PodPhase
 | 
			
		||||
		wantDisruptionTarget          *v1.PodCondition
 | 
			
		||||
@@ -56,6 +57,24 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
			
		||||
				Message: "PodGC: node no longer exists",
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		"PodDisruptionConditions and PodReplacementPolicy enabled": {
 | 
			
		||||
			enablePodDisruptionConditions: true,
 | 
			
		||||
			enableJobPodReplacementPolicy: true,
 | 
			
		||||
			phase:                         v1.PodPending,
 | 
			
		||||
			wantPhase:                     v1.PodFailed,
 | 
			
		||||
			wantDisruptionTarget: &v1.PodCondition{
 | 
			
		||||
				Type:    v1.DisruptionTarget,
 | 
			
		||||
				Status:  v1.ConditionTrue,
 | 
			
		||||
				Reason:  "DeletionByPodGC",
 | 
			
		||||
				Message: "PodGC: node no longer exists",
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		"Only PodReplacementPolicy enabled; no PodDisruptionCondition": {
 | 
			
		||||
			enablePodDisruptionConditions: false,
 | 
			
		||||
			enableJobPodReplacementPolicy: true,
 | 
			
		||||
			phase:                         v1.PodPending,
 | 
			
		||||
			wantPhase:                     v1.PodFailed,
 | 
			
		||||
		},
 | 
			
		||||
		"PodDisruptionConditions disabled": {
 | 
			
		||||
			enablePodDisruptionConditions: false,
 | 
			
		||||
			phase:                         v1.PodPending,
 | 
			
		||||
@@ -76,6 +95,7 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
			
		||||
	for name, test := range tests {
 | 
			
		||||
		t.Run(name, func(t *testing.T) {
 | 
			
		||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
 | 
			
		||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
 | 
			
		||||
			testCtx := setup(t, "podgc-orphaned")
 | 
			
		||||
			cs := testCtx.ClientSet
 | 
			
		||||
 | 
			
		||||
@@ -151,6 +171,7 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
 | 
			
		||||
func TestTerminatingOnOutOfServiceNode(t *testing.T) {
 | 
			
		||||
	tests := map[string]struct {
 | 
			
		||||
		enablePodDisruptionConditions bool
 | 
			
		||||
		enableJobPodReplacementPolicy bool
 | 
			
		||||
		withFinalizer                 bool
 | 
			
		||||
		wantPhase                     v1.PodPhase
 | 
			
		||||
	}{
 | 
			
		||||
@@ -172,11 +193,19 @@ func TestTerminatingOnOutOfServiceNode(t *testing.T) {
 | 
			
		||||
			enablePodDisruptionConditions: false,
 | 
			
		||||
			withFinalizer:                 false,
 | 
			
		||||
		},
 | 
			
		||||
		"pod has phase changed when PodDisruptionConditions disabled, but JobPodReplacementPolicy enabled": {
 | 
			
		||||
			enablePodDisruptionConditions: false,
 | 
			
		||||
			enableJobPodReplacementPolicy: true,
 | 
			
		||||
			withFinalizer:                 true,
 | 
			
		||||
			wantPhase:                     v1.PodFailed,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for name, test := range tests {
 | 
			
		||||
		t.Run(name, func(t *testing.T) {
 | 
			
		||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
 | 
			
		||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
 | 
			
		||||
			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
 | 
			
		||||
			testCtx := setup(t, "podgc-out-of-service")
 | 
			
		||||
			cs := testCtx.ClientSet
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user