Merge pull request #119159 from alculquicondor/fix-job-uncounted
Only declare job as finished after removing all finalizers
This commit is contained in:
		| @@ -792,12 +792,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) { | ||||
| 	var manageJobErr error | ||||
| 	var finishedCondition *batch.JobCondition | ||||
|  | ||||
| 	jobHasNewFailure := failed > job.Status.Failed | ||||
| 	// new failures happen when status does not reflect the failures and active | ||||
| 	// is different than parallelism, otherwise the previous controller loop | ||||
| 	// failed updating status so even if we pick up failure it is not a new one | ||||
| 	exceedsBackoffLimit := jobHasNewFailure && (active != *job.Spec.Parallelism) && | ||||
| 		(failed > *job.Spec.BackoffLimit) | ||||
| 	exceedsBackoffLimit := failed > *job.Spec.BackoffLimit | ||||
|  | ||||
| 	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) { | ||||
| 		if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil { | ||||
| @@ -999,6 +994,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job | ||||
| 		needsFlush = true | ||||
| 	} | ||||
| 	podFailureCountByPolicyAction := map[string]int{} | ||||
| 	reachedMaxUncountedPods := false | ||||
| 	for _, pod := range pods { | ||||
| 		if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) { | ||||
| 			// This pod was processed in a previous sync. | ||||
| @@ -1049,6 +1045,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job | ||||
| 			// | ||||
| 			// The job will be synced again because the Job status and Pod updates | ||||
| 			// will put the Job back to the work queue. | ||||
| 			reachedMaxUncountedPods = true | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| @@ -1077,7 +1074,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job | ||||
| 	if job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, podFailureCountByPolicyAction, needsFlush, newBackoffRecord); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	jobFinished := jm.enactJobFinished(job, finishedCond) | ||||
| 	jobFinished := !reachedMaxUncountedPods && jm.enactJobFinished(job, finishedCond) | ||||
| 	if jobFinished { | ||||
| 		needsFlush = true | ||||
| 	} | ||||
|   | ||||
| @@ -1341,6 +1341,9 @@ func TestOrphanPodsFinalizersClearedWithGC(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) { | ||||
| 	// Set a maximum number of uncounted pods below parallelism, to ensure it | ||||
| 	// doesn't affect the termination of pods. | ||||
| 	t.Cleanup(setDuringTest(&jobcontroller.MaxUncountedPods, 50)) | ||||
| 	closeFn, restConfig, clientSet, ns := setup(t, "simple") | ||||
| 	defer closeFn() | ||||
| 	ctx, cancel := startJobControllerAndWaitForCaches(restConfig) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot