Delay setting terminal Job conditions until all pods are terminal

Fix the integration test typecheck

Fix after rebase

# Conflicts:
#	pkg/controller/job/job_controller_test.go
This commit is contained in:
Michal Wozniak
2024-07-11 20:54:09 +02:00
parent e5ff4b8fcd
commit fb7704ba03
8 changed files with 1177 additions and 59 deletions

View File

@@ -21,7 +21,6 @@ import (
"encoding/json"
"fmt"
"strconv"
"time"
batchv1 "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
@@ -83,6 +82,10 @@ var _ = SIGDescribe("Job", func() {
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -130,6 +133,10 @@ var _ = SIGDescribe("Job", func() {
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim failure condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonPodFailurePolicy)
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job fails")
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
framework.ExpectNoError(err, "failed to ensure job failure in namespace: %s", f.Namespace.Name)
@@ -168,6 +175,10 @@ var _ = SIGDescribe("Job", func() {
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -248,6 +259,10 @@ var _ = SIGDescribe("Job", func() {
err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete)
framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -337,6 +352,10 @@ var _ = SIGDescribe("Job", func() {
err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete)
framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -369,6 +388,10 @@ var _ = SIGDescribe("Job", func() {
job, err = e2ejob.UpdateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to update job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Waiting for job to complete")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -537,6 +560,10 @@ done`}
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim failure condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonFailedIndexes)
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to fail as there are failed indexes")
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -573,6 +600,10 @@ done`}
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim failure condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonMaxFailedIndexesExceeded)
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to fail as the number of max failed indexes is exceeded")
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -616,6 +647,10 @@ done`}
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim failure condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonFailedIndexes)
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to fail as all indexes are failed")
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -705,6 +740,10 @@ done`}
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, *job.Spec.Completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -720,8 +759,13 @@ done`}
job := e2ejob.NewTestJob("notTerminate", "exceed-active-deadline", v1.RestartPolicyNever, parallelism, completions, &activeDeadlineSeconds, backoffLimit)
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim failure condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonDeadlineExceeded)
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job past active deadline")
err = waitForJobFailure(ctx, f.ClientSet, f.Namespace.Name, job.Name, time.Duration(activeDeadlineSeconds+15)*time.Second, "DeadlineExceeded")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailed, batchv1.JobReasonDeadlineExceeded)
framework.ExpectNoError(err, "failed to ensure job past active deadline in namespace: %s", f.Namespace.Name)
})
@@ -823,9 +867,13 @@ done`}
job := e2ejob.NewTestJob("fail", "backofflimit", v1.RestartPolicyNever, 1, 1, nil, int32(backoff))
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Ensuring job exceed backofflimit")
err = waitForJobFailure(ctx, f.ClientSet, f.Namespace.Name, job.Name, e2ejob.JobTimeout, "BackoffLimitExceeded")
ginkgo.By("Awaiting for the job to have the interim failure condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonBackoffLimitExceeded)
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job exceed backofflimit")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailed, batchv1.JobReasonBackoffLimitExceeded)
framework.ExpectNoError(err, "failed to ensure job exceed backofflimit in namespace: %s", f.Namespace.Name)
ginkgo.By(fmt.Sprintf("Checking that %d pod created and status is failed", backoff+1))
@@ -870,6 +918,10 @@ done`}
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
ginkgo.By("Awaiting for the job to have the interim success condition")
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
@@ -1186,22 +1238,26 @@ func waitForJobEvent(ctx context.Context, config watchEventConfig) {
}
}
// waitForJobFailure uses c to wait for up to timeout for the Job named jobName in namespace ns to fail.
func waitForJobFailure(ctx context.Context, c clientset.Interface, ns, jobName string, timeout time.Duration, reason string) error {
return wait.Poll(framework.Poll, timeout, func() (bool, error) {
// waitForJobCondition waits for the specified Job to have the expected condition with the specific reason.
func waitForJobCondition(ctx context.Context, c clientset.Interface, ns, jobName string, cType batchv1.JobConditionType, reason string) error {
err := wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, c := range curr.Status.Conditions {
if c.Type == batchv1.JobFailed && c.Status == v1.ConditionTrue {
if reason == "" || reason == c.Reason {
if c.Type == cType && c.Status == v1.ConditionTrue {
if reason == c.Reason {
return true, nil
}
}
}
return false, nil
})
if err != nil {
return fmt.Errorf("waiting for Job %q to have the condition %q with reason: %q: %w", jobName, cType, reason, err)
}
return nil
}
func findConditionByType(list []batchv1.JobCondition, cType batchv1.JobConditionType) *batchv1.JobCondition {

View File

@@ -29,6 +29,7 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
batchv1 "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
eventsv1 "k8s.io/api/events/v1"
@@ -1160,6 +1161,301 @@ func TestBackoffLimitPerIndex_JobPodsCreatedWithExponentialBackoff(t *testing.T)
}
}
// TestDelayTerminalPhaseCondition tests the fix for Job controller to delay
// setting the terminal phase conditions (Failed and Complete) until all Pods
// are terminal. The fate of the Job is indicated by the interim Job conditions:
// FailureTarget, or SuccessCriteriaMet.
func TestDelayTerminalPhaseCondition(t *testing.T) {
t.Cleanup(setDurationDuringTest(&jobcontroller.DefaultJobPodFailureBackOff, fastPodFailureBackoff))
podTemplateSpec := v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Finalizers: []string{"fake.example.com/blockDeletion"},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "main-container",
Image: "foo",
ImagePullPolicy: v1.PullIfNotPresent,
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
},
},
},
}
failOnePod := func(ctx context.Context, clientSet clientset.Interface, jobObj *batchv1.Job) {
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil {
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodFailed, err)
}
}
succeedOnePodAndScaleDown := func(ctx context.Context, clientSet clientset.Interface, jobObj *batchv1.Job) {
// mark one pod as succeeded
if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodSucceeded, 0); err != nil {
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodSucceeded, err)
}
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
if _, err := updateJob(ctx, jobClient, jobObj.Name, func(j *batchv1.Job) {
j.Spec.Parallelism = ptr.To[int32](1)
j.Spec.Completions = ptr.To[int32](1)
}); err != nil {
t.Fatalf("Unexpected error when scaling down the job: %v", err)
}
}
testCases := map[string]struct {
enableJobManagedBy bool
enableJobPodReplacementPolicy bool
job batchv1.Job
action func(context.Context, clientset.Interface, *batchv1.Job)
wantInterimStatus *batchv1.JobStatus
wantTerminalStatus batchv1.JobStatus
}{
"job backoff limit exceeded; JobPodReplacementPolicy and JobManagedBy disabled": {
job: batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
Template: podTemplateSpec,
BackoffLimit: ptr.To[int32](0),
},
},
action: failOnePod,
wantTerminalStatus: batchv1.JobStatus{
Failed: 2,
Ready: ptr.To[int32](0),
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailed,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
},
},
},
"job backoff limit exceeded; JobPodReplacementPolicy enabled": {
enableJobPodReplacementPolicy: true,
job: batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
Template: podTemplateSpec,
BackoffLimit: ptr.To[int32](0),
},
},
action: failOnePod,
wantInterimStatus: &batchv1.JobStatus{
Failed: 2,
Ready: ptr.To[int32](0),
Terminating: ptr.To[int32](1),
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailureTarget,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
},
},
wantTerminalStatus: batchv1.JobStatus{
Failed: 2,
Ready: ptr.To[int32](0),
Terminating: ptr.To[int32](0),
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailureTarget,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
{
Type: batchv1.JobFailed,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
},
},
},
"job backoff limit exceeded; JobManagedBy enabled": {
enableJobManagedBy: true,
job: batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
Template: podTemplateSpec,
BackoffLimit: ptr.To[int32](0),
},
},
action: failOnePod,
wantInterimStatus: &batchv1.JobStatus{
Failed: 2,
Ready: ptr.To[int32](0),
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailureTarget,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
},
},
wantTerminalStatus: batchv1.JobStatus{
Failed: 2,
Ready: ptr.To[int32](0),
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailureTarget,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
{
Type: batchv1.JobFailed,
Status: v1.ConditionTrue,
Reason: batchv1.JobReasonBackoffLimitExceeded,
},
},
},
},
"job scale down to meet completions; JobPodReplacementPolicy and JobManagedBy disabled": {
job: batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
CompletionMode: ptr.To(batchv1.IndexedCompletion),
Template: podTemplateSpec,
},
},
action: succeedOnePodAndScaleDown,
wantTerminalStatus: batchv1.JobStatus{
Succeeded: 1,
Ready: ptr.To[int32](0),
CompletedIndexes: "0",
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobComplete,
Status: v1.ConditionTrue,
},
},
},
},
"job scale down to meet completions; JobPodReplacementPolicy enabled": {
enableJobPodReplacementPolicy: true,
job: batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
CompletionMode: ptr.To(batchv1.IndexedCompletion),
Template: podTemplateSpec,
},
},
action: succeedOnePodAndScaleDown,
wantInterimStatus: &batchv1.JobStatus{
Succeeded: 1,
Ready: ptr.To[int32](0),
Terminating: ptr.To[int32](1),
CompletedIndexes: "0",
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobSuccessCriteriaMet,
Status: v1.ConditionTrue,
},
},
},
wantTerminalStatus: batchv1.JobStatus{
Succeeded: 1,
Ready: ptr.To[int32](0),
Terminating: ptr.To[int32](0),
CompletedIndexes: "0",
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobSuccessCriteriaMet,
Status: v1.ConditionTrue,
},
{
Type: batchv1.JobComplete,
Status: v1.ConditionTrue,
},
},
},
},
"job scale down to meet completions; JobManagedBy enabled": {
enableJobManagedBy: true,
job: batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
CompletionMode: ptr.To(batchv1.IndexedCompletion),
Template: podTemplateSpec,
},
},
action: succeedOnePodAndScaleDown,
wantInterimStatus: &batchv1.JobStatus{
Succeeded: 1,
Ready: ptr.To[int32](0),
CompletedIndexes: "0",
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobSuccessCriteriaMet,
Status: v1.ConditionTrue,
},
},
},
wantTerminalStatus: batchv1.JobStatus{
Succeeded: 1,
Ready: ptr.To[int32](0),
CompletedIndexes: "0",
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobSuccessCriteriaMet,
Status: v1.ConditionTrue,
},
{
Type: batchv1.JobComplete,
Status: v1.ConditionTrue,
},
},
},
},
}
for name, test := range testCases {
t.Run(name, func(t *testing.T) {
resetMetrics()
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, test.enableJobManagedBy)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.ElasticIndexedJob, true)
closeFn, restConfig, clientSet, ns := setup(t, "delay-terminal-condition")
t.Cleanup(closeFn)
ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig)
t.Cleanup(cancel)
jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &test.job)
if err != nil {
t.Fatalf("Error %q while creating the job %q", err, jobObj.Name)
}
t.Cleanup(func() { removePodsFinalizer(ctx, t, clientSet, ns.Name) })
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
waitForPodsToBeActive(ctx, t, jobClient, *jobObj.Spec.Parallelism, jobObj)
test.action(ctx, clientSet, jobObj)
if test.wantInterimStatus != nil {
validateJobStatus(ctx, t, clientSet, jobObj, *test.wantInterimStatus)
// Set terminal phase to all the remaining pods to simulate
// Kubelet (or other components like PodGC).
jobPods, err := getJobPods(ctx, t, clientSet, jobObj, func(s v1.PodStatus) bool {
return (s.Phase == v1.PodPending || s.Phase == v1.PodRunning)
})
if err != nil {
t.Fatalf("Failed to list Job Pods: %v", err)
}
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, len(jobPods)); err != nil {
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodSucceeded, err)
}
}
validateJobStatus(ctx, t, clientSet, jobObj, test.wantTerminalStatus)
})
}
}
// TestBackoffLimitPerIndex tests handling of job and its pods when
// backoff limit per index is used.
func TestBackoffLimitPerIndex(t *testing.T) {
@@ -2821,7 +3117,7 @@ func TestElasticIndexedJob(t *testing.T) {
jobUpdates: []jobUpdate{
{
completions: ptr.To[int32](0),
wantTerminating: ptr.To[int32](3),
wantTerminating: ptr.To[int32](0),
},
},
},
@@ -3595,6 +3891,25 @@ func validateJobsPodsStatusOnlyWithTimeout(ctx context.Context, t testing.TB, cl
}
}
func validateJobStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, wantStatus batchv1.JobStatus) {
t.Helper()
diff := ""
if err := wait.PollUntilContextTimeout(ctx, waitInterval, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) {
gotJob, err := clientSet.BatchV1().Jobs(jobObj.Namespace).Get(ctx, jobObj.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get updated Job: %v, last status diff (-want,+got):\n%s", err, diff)
}
diff = cmp.Diff(wantStatus, gotJob.Status,
cmpopts.EquateEmpty(),
cmpopts.IgnoreFields(batchv1.JobStatus{}, "StartTime", "UncountedTerminatedPods", "CompletionTime"),
cmpopts.IgnoreFields(batchv1.JobCondition{}, "LastProbeTime", "LastTransitionTime", "Message"),
)
return diff == "", nil
}); err != nil {
t.Fatalf("Waiting for Job Status: %v\n, Status diff (-want,+got):\n%s", err, diff)
}
}
func validateJobPodsStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, desired podsByStatus) {
t.Helper()
validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, desired)