Delay setting terminal Job conditions until all pods are terminal
Fix the integration test typecheck Fix after rebase # Conflicts: # pkg/controller/job/job_controller_test.go
This commit is contained in:
@@ -21,7 +21,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
batchv1 "k8s.io/api/batch/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -83,6 +82,10 @@ var _ = SIGDescribe("Job", func() {
|
||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job reaches completions")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -130,6 +133,10 @@ var _ = SIGDescribe("Job", func() {
|
||||
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonPodFailurePolicy)
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job fails")
|
||||
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
|
||||
framework.ExpectNoError(err, "failed to ensure job failure in namespace: %s", f.Namespace.Name)
|
||||
@@ -168,6 +175,10 @@ var _ = SIGDescribe("Job", func() {
|
||||
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job reaches completions")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -248,6 +259,10 @@ var _ = SIGDescribe("Job", func() {
|
||||
err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete)
|
||||
framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job reaches completions")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -337,6 +352,10 @@ var _ = SIGDescribe("Job", func() {
|
||||
err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete)
|
||||
framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job reaches completions")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -369,6 +388,10 @@ var _ = SIGDescribe("Job", func() {
|
||||
job, err = e2ejob.UpdateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to update job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Waiting for job to complete")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -537,6 +560,10 @@ done`}
|
||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonFailedIndexes)
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to fail as there are failed indexes")
|
||||
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -573,6 +600,10 @@ done`}
|
||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonMaxFailedIndexesExceeded)
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to fail as the number of max failed indexes is exceeded")
|
||||
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -616,6 +647,10 @@ done`}
|
||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonFailedIndexes)
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to fail as all indexes are failed")
|
||||
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -705,6 +740,10 @@ done`}
|
||||
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job reaches completions")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, *job.Spec.Completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -720,8 +759,13 @@ done`}
|
||||
job := e2ejob.NewTestJob("notTerminate", "exceed-active-deadline", v1.RestartPolicyNever, parallelism, completions, &activeDeadlineSeconds, backoffLimit)
|
||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonDeadlineExceeded)
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job past active deadline")
|
||||
err = waitForJobFailure(ctx, f.ClientSet, f.Namespace.Name, job.Name, time.Duration(activeDeadlineSeconds+15)*time.Second, "DeadlineExceeded")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailed, batchv1.JobReasonDeadlineExceeded)
|
||||
framework.ExpectNoError(err, "failed to ensure job past active deadline in namespace: %s", f.Namespace.Name)
|
||||
})
|
||||
|
||||
@@ -823,9 +867,13 @@ done`}
|
||||
job := e2ejob.NewTestJob("fail", "backofflimit", v1.RestartPolicyNever, 1, 1, nil, int32(backoff))
|
||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
ginkgo.By("Ensuring job exceed backofflimit")
|
||||
|
||||
err = waitForJobFailure(ctx, f.ClientSet, f.Namespace.Name, job.Name, e2ejob.JobTimeout, "BackoffLimitExceeded")
|
||||
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonBackoffLimitExceeded)
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job exceed backofflimit")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailed, batchv1.JobReasonBackoffLimitExceeded)
|
||||
framework.ExpectNoError(err, "failed to ensure job exceed backofflimit in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Checking that %d pod created and status is failed", backoff+1))
|
||||
@@ -870,6 +918,10 @@ done`}
|
||||
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||
err = waitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||
|
||||
ginkgo.By("Ensuring job reaches completions")
|
||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions)
|
||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||
@@ -1186,22 +1238,26 @@ func waitForJobEvent(ctx context.Context, config watchEventConfig) {
|
||||
}
|
||||
}
|
||||
|
||||
// waitForJobFailure uses c to wait for up to timeout for the Job named jobName in namespace ns to fail.
|
||||
func waitForJobFailure(ctx context.Context, c clientset.Interface, ns, jobName string, timeout time.Duration, reason string) error {
|
||||
return wait.Poll(framework.Poll, timeout, func() (bool, error) {
|
||||
// waitForJobCondition waits for the specified Job to have the expected condition with the specific reason.
|
||||
func waitForJobCondition(ctx context.Context, c clientset.Interface, ns, jobName string, cType batchv1.JobConditionType, reason string) error {
|
||||
err := wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
|
||||
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, c := range curr.Status.Conditions {
|
||||
if c.Type == batchv1.JobFailed && c.Status == v1.ConditionTrue {
|
||||
if reason == "" || reason == c.Reason {
|
||||
if c.Type == cType && c.Status == v1.ConditionTrue {
|
||||
if reason == c.Reason {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("waiting for Job %q to have the condition %q with reason: %q: %w", jobName, cType, reason, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func findConditionByType(list []batchv1.JobCondition, cType batchv1.JobConditionType) *batchv1.JobCondition {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
batchv1 "k8s.io/api/batch/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
eventsv1 "k8s.io/api/events/v1"
|
||||
@@ -1160,6 +1161,301 @@ func TestBackoffLimitPerIndex_JobPodsCreatedWithExponentialBackoff(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDelayTerminalPhaseCondition tests the fix for Job controller to delay
|
||||
// setting the terminal phase conditions (Failed and Complete) until all Pods
|
||||
// are terminal. The fate of the Job is indicated by the interim Job conditions:
|
||||
// FailureTarget, or SuccessCriteriaMet.
|
||||
func TestDelayTerminalPhaseCondition(t *testing.T) {
|
||||
t.Cleanup(setDurationDuringTest(&jobcontroller.DefaultJobPodFailureBackOff, fastPodFailureBackoff))
|
||||
|
||||
podTemplateSpec := v1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Finalizers: []string{"fake.example.com/blockDeletion"},
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "main-container",
|
||||
Image: "foo",
|
||||
ImagePullPolicy: v1.PullIfNotPresent,
|
||||
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
failOnePod := func(ctx context.Context, clientSet clientset.Interface, jobObj *batchv1.Job) {
|
||||
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil {
|
||||
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodFailed, err)
|
||||
}
|
||||
}
|
||||
succeedOnePodAndScaleDown := func(ctx context.Context, clientSet clientset.Interface, jobObj *batchv1.Job) {
|
||||
// mark one pod as succeeded
|
||||
if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodSucceeded, 0); err != nil {
|
||||
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodSucceeded, err)
|
||||
}
|
||||
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
|
||||
if _, err := updateJob(ctx, jobClient, jobObj.Name, func(j *batchv1.Job) {
|
||||
j.Spec.Parallelism = ptr.To[int32](1)
|
||||
j.Spec.Completions = ptr.To[int32](1)
|
||||
}); err != nil {
|
||||
t.Fatalf("Unexpected error when scaling down the job: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
testCases := map[string]struct {
|
||||
enableJobManagedBy bool
|
||||
enableJobPodReplacementPolicy bool
|
||||
|
||||
job batchv1.Job
|
||||
action func(context.Context, clientset.Interface, *batchv1.Job)
|
||||
wantInterimStatus *batchv1.JobStatus
|
||||
wantTerminalStatus batchv1.JobStatus
|
||||
}{
|
||||
"job backoff limit exceeded; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||
job: batchv1.Job{
|
||||
Spec: batchv1.JobSpec{
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
Template: podTemplateSpec,
|
||||
BackoffLimit: ptr.To[int32](0),
|
||||
},
|
||||
},
|
||||
action: failOnePod,
|
||||
wantTerminalStatus: batchv1.JobStatus{
|
||||
Failed: 2,
|
||||
Ready: ptr.To[int32](0),
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"job backoff limit exceeded; JobPodReplacementPolicy enabled": {
|
||||
enableJobPodReplacementPolicy: true,
|
||||
job: batchv1.Job{
|
||||
Spec: batchv1.JobSpec{
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
Template: podTemplateSpec,
|
||||
BackoffLimit: ptr.To[int32](0),
|
||||
},
|
||||
},
|
||||
action: failOnePod,
|
||||
wantInterimStatus: &batchv1.JobStatus{
|
||||
Failed: 2,
|
||||
Ready: ptr.To[int32](0),
|
||||
Terminating: ptr.To[int32](1),
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobFailureTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
},
|
||||
},
|
||||
wantTerminalStatus: batchv1.JobStatus{
|
||||
Failed: 2,
|
||||
Ready: ptr.To[int32](0),
|
||||
Terminating: ptr.To[int32](0),
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobFailureTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
{
|
||||
Type: batchv1.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"job backoff limit exceeded; JobManagedBy enabled": {
|
||||
enableJobManagedBy: true,
|
||||
job: batchv1.Job{
|
||||
Spec: batchv1.JobSpec{
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
Template: podTemplateSpec,
|
||||
BackoffLimit: ptr.To[int32](0),
|
||||
},
|
||||
},
|
||||
action: failOnePod,
|
||||
wantInterimStatus: &batchv1.JobStatus{
|
||||
Failed: 2,
|
||||
Ready: ptr.To[int32](0),
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobFailureTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
},
|
||||
},
|
||||
wantTerminalStatus: batchv1.JobStatus{
|
||||
Failed: 2,
|
||||
Ready: ptr.To[int32](0),
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobFailureTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
{
|
||||
Type: batchv1.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"job scale down to meet completions; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||
job: batchv1.Job{
|
||||
Spec: batchv1.JobSpec{
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
CompletionMode: ptr.To(batchv1.IndexedCompletion),
|
||||
Template: podTemplateSpec,
|
||||
},
|
||||
},
|
||||
action: succeedOnePodAndScaleDown,
|
||||
wantTerminalStatus: batchv1.JobStatus{
|
||||
Succeeded: 1,
|
||||
Ready: ptr.To[int32](0),
|
||||
CompletedIndexes: "0",
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"job scale down to meet completions; JobPodReplacementPolicy enabled": {
|
||||
enableJobPodReplacementPolicy: true,
|
||||
job: batchv1.Job{
|
||||
Spec: batchv1.JobSpec{
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
CompletionMode: ptr.To(batchv1.IndexedCompletion),
|
||||
Template: podTemplateSpec,
|
||||
},
|
||||
},
|
||||
action: succeedOnePodAndScaleDown,
|
||||
wantInterimStatus: &batchv1.JobStatus{
|
||||
Succeeded: 1,
|
||||
Ready: ptr.To[int32](0),
|
||||
Terminating: ptr.To[int32](1),
|
||||
CompletedIndexes: "0",
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
wantTerminalStatus: batchv1.JobStatus{
|
||||
Succeeded: 1,
|
||||
Ready: ptr.To[int32](0),
|
||||
Terminating: ptr.To[int32](0),
|
||||
CompletedIndexes: "0",
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: batchv1.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"job scale down to meet completions; JobManagedBy enabled": {
|
||||
enableJobManagedBy: true,
|
||||
job: batchv1.Job{
|
||||
Spec: batchv1.JobSpec{
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
CompletionMode: ptr.To(batchv1.IndexedCompletion),
|
||||
Template: podTemplateSpec,
|
||||
},
|
||||
},
|
||||
action: succeedOnePodAndScaleDown,
|
||||
wantInterimStatus: &batchv1.JobStatus{
|
||||
Succeeded: 1,
|
||||
Ready: ptr.To[int32](0),
|
||||
CompletedIndexes: "0",
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
wantTerminalStatus: batchv1.JobStatus{
|
||||
Succeeded: 1,
|
||||
Ready: ptr.To[int32](0),
|
||||
CompletedIndexes: "0",
|
||||
Conditions: []batchv1.JobCondition{
|
||||
{
|
||||
Type: batchv1.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: batchv1.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for name, test := range testCases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
resetMetrics()
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, test.enableJobManagedBy)
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.ElasticIndexedJob, true)
|
||||
|
||||
closeFn, restConfig, clientSet, ns := setup(t, "delay-terminal-condition")
|
||||
t.Cleanup(closeFn)
|
||||
ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &test.job)
|
||||
if err != nil {
|
||||
t.Fatalf("Error %q while creating the job %q", err, jobObj.Name)
|
||||
}
|
||||
t.Cleanup(func() { removePodsFinalizer(ctx, t, clientSet, ns.Name) })
|
||||
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
|
||||
|
||||
waitForPodsToBeActive(ctx, t, jobClient, *jobObj.Spec.Parallelism, jobObj)
|
||||
|
||||
test.action(ctx, clientSet, jobObj)
|
||||
if test.wantInterimStatus != nil {
|
||||
validateJobStatus(ctx, t, clientSet, jobObj, *test.wantInterimStatus)
|
||||
|
||||
// Set terminal phase to all the remaining pods to simulate
|
||||
// Kubelet (or other components like PodGC).
|
||||
jobPods, err := getJobPods(ctx, t, clientSet, jobObj, func(s v1.PodStatus) bool {
|
||||
return (s.Phase == v1.PodPending || s.Phase == v1.PodRunning)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list Job Pods: %v", err)
|
||||
}
|
||||
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, len(jobPods)); err != nil {
|
||||
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodSucceeded, err)
|
||||
}
|
||||
}
|
||||
validateJobStatus(ctx, t, clientSet, jobObj, test.wantTerminalStatus)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestBackoffLimitPerIndex tests handling of job and its pods when
|
||||
// backoff limit per index is used.
|
||||
func TestBackoffLimitPerIndex(t *testing.T) {
|
||||
@@ -2821,7 +3117,7 @@ func TestElasticIndexedJob(t *testing.T) {
|
||||
jobUpdates: []jobUpdate{
|
||||
{
|
||||
completions: ptr.To[int32](0),
|
||||
wantTerminating: ptr.To[int32](3),
|
||||
wantTerminating: ptr.To[int32](0),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -3595,6 +3891,25 @@ func validateJobsPodsStatusOnlyWithTimeout(ctx context.Context, t testing.TB, cl
|
||||
}
|
||||
}
|
||||
|
||||
func validateJobStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, wantStatus batchv1.JobStatus) {
|
||||
t.Helper()
|
||||
diff := ""
|
||||
if err := wait.PollUntilContextTimeout(ctx, waitInterval, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) {
|
||||
gotJob, err := clientSet.BatchV1().Jobs(jobObj.Namespace).Get(ctx, jobObj.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get updated Job: %v, last status diff (-want,+got):\n%s", err, diff)
|
||||
}
|
||||
diff = cmp.Diff(wantStatus, gotJob.Status,
|
||||
cmpopts.EquateEmpty(),
|
||||
cmpopts.IgnoreFields(batchv1.JobStatus{}, "StartTime", "UncountedTerminatedPods", "CompletionTime"),
|
||||
cmpopts.IgnoreFields(batchv1.JobCondition{}, "LastProbeTime", "LastTransitionTime", "Message"),
|
||||
)
|
||||
return diff == "", nil
|
||||
}); err != nil {
|
||||
t.Fatalf("Waiting for Job Status: %v\n, Status diff (-want,+got):\n%s", err, diff)
|
||||
}
|
||||
}
|
||||
|
||||
func validateJobPodsStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, desired podsByStatus) {
|
||||
t.Helper()
|
||||
validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, desired)
|
||||
|
||||
Reference in New Issue
Block a user