Merge pull request #125485 from mimowo/refactor-job-e2e-for-conformance
Split Job e2e test to make them possible targets for conformance promotion
This commit is contained in:
		| @@ -173,74 +173,120 @@ var _ = SIGDescribe("Job", func() { | |||||||
| 		framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) | 		framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) | ||||||
| 	}) | 	}) | ||||||
|  |  | ||||||
| 	// This test is using an indexed job. The pod corresponding to the 0th index | 	/* | ||||||
| 	// creates a marker file on the host and runs 'forever' until evicted. We use | 		Testname: Ensure pod failure policy allows to ignore failure for an evicted pod; matching on the exit code | ||||||
| 	// the non-0-indexed pods to determine if the marker file is already | 		Description: This test is using an indexed job. The pod corresponding to the 0th index | ||||||
| 	// created by the 0th indexed pod - the non-0-indexed pods fail and restart | 		creates a marker file on the host and runs 'forever' until evicted. We use | ||||||
| 	// until the marker file is created (their potential failures are ignored | 		the non-0-indexed pods to determine if the marker file is already | ||||||
| 	// based on the exit code). Once the marker file is created the 0th indexed | 		created by the 0th indexed pod - the non-0-indexed pods fail and restart | ||||||
| 	// pod is evicted (DisruptionTarget condition is added in the process), | 		until the marker file is created (their potential failures are ignored | ||||||
| 	// after restart it runs to successful completion. | 		based on the exit code). Once the marker file is created the 0th indexed | ||||||
| 	// Steps: | 		pod is evicted (DisruptionTarget condition is added in the process), | ||||||
| 	// 1. Select a node to run all Job's pods to ensure the host marker file is accessible by all pods | 		after restart it runs to successful completion. | ||||||
| 	// 2. Create the indexed job | 		Steps: | ||||||
| 	// 3. Await for all non-0-indexed pods to succeed to ensure the marker file is created by the 0-indexed pod | 		1. Select a node to run all Job's pods to ensure the host marker file is accessible by all pods | ||||||
| 	// 4. Make sure the 0-indexed pod is running | 		2. Create the indexed job with pod failure policy which ignores failed pods with 137 exit code | ||||||
| 	// 5. Evict the 0-indexed pod | 		3. Await for all non-0-indexed pods to succeed to ensure the marker file is created by the 0-indexed pod | ||||||
| 	// 6. Await for the job to successfully complete | 		4. Make sure the 0-indexed pod is running | ||||||
| 	ginkgo.DescribeTable("Using a pod failure policy to not count some failures towards the backoffLimit", | 		5. Evict the 0-indexed pod, the failure is ignored as it matches the pod failure policy | ||||||
| 		func(ctx context.Context, policy *batchv1.PodFailurePolicy) { | 		6. Await for the job to successfully complete | ||||||
| 			mode := batchv1.IndexedCompletion | 	*/ | ||||||
|  | 	ginkgo.It("should allow to use a pod failure policy to ignore failure for an evicted pod; matching on the exit code", func(ctx context.Context) { | ||||||
|  | 		// We set the backoffLimit to 0 so that any pod failure would trigger | ||||||
|  | 		// job failure if not for the pod failure policy to ignore the failed | ||||||
|  | 		// pods from counting them towards the backoffLimit. | ||||||
|  | 		parallelism := int32(2) | ||||||
|  | 		completions := int32(4) | ||||||
|  | 		backoffLimit := int32(0) | ||||||
|  |  | ||||||
| 			// We set the backoffLimit to 0 so that any pod failure would trigger | 		ginkgo.By("Looking for a node to schedule job pods") | ||||||
| 			// job failure if not for the pod failure policy to ignore the failed | 		node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) | ||||||
| 			// pods from counting them towards the backoffLimit. | 		framework.ExpectNoError(err) | ||||||
| 			parallelism := int32(2) |  | ||||||
| 			completions := int32(4) |  | ||||||
| 			backoffLimit := int32(0) |  | ||||||
|  |  | ||||||
| 			ginkgo.By("Looking for a node to schedule job pods") | 		ginkgo.By("Creating a job") | ||||||
| 			node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) | 		job := e2ejob.NewTestJobOnNode("notTerminateOnce", "evicted-pod-ignore-on-exit-code", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) | ||||||
| 			framework.ExpectNoError(err) | 		job.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion) | ||||||
|  | 		job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{ | ||||||
| 			ginkgo.By("Creating a job") | 			Rules: []batchv1.PodFailurePolicyRule{ | ||||||
| 			job := e2ejob.NewTestJobOnNode("notTerminateOnce", "pod-disruption-failure-ignore", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) | 				{ | ||||||
| 			job.Spec.CompletionMode = &mode | 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created | ||||||
| 			job.Spec.PodFailurePolicy = policy | 					// And the 137 in the 0-indexed pod due to eviction. | ||||||
| 			job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) | 					Action: batchv1.PodFailurePolicyActionIgnore, | ||||||
| 			framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) | 					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{ | ||||||
|  | 						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn, | ||||||
| 			ginkgo.By("Awaiting for all non 0-indexed pods to succeed to ensure the marker file is created") | 						Values:   []int32{1, 137}, | ||||||
| 			err = e2ejob.WaitForJobPodsSucceeded(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions-1) | 					}, | ||||||
| 			framework.ExpectNoError(err, "failed to await for all non 0-indexed pods to succeed for job: %s/%s", job.Name, job.Namespace) |  | ||||||
|  |  | ||||||
| 			ginkgo.By("Awaiting for the 0-indexed pod to be running") |  | ||||||
| 			err = e2ejob.WaitForJobPodsRunning(ctx, f.ClientSet, f.Namespace.Name, job.Name, 1) |  | ||||||
| 			framework.ExpectNoError(err, "failed to await for the 0-indexed pod to be running for the job: %s/%s", job.Name, job.Namespace) |  | ||||||
|  |  | ||||||
| 			pods, err := e2ejob.GetAllRunningJobPods(ctx, f.ClientSet, f.Namespace.Name, job.Name) |  | ||||||
| 			framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace) |  | ||||||
| 			gomega.Expect(pods).To(gomega.HaveLen(1), "Exactly one running pod is expected") |  | ||||||
| 			pod := pods[0] |  | ||||||
| 			ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace)) |  | ||||||
| 			evictTarget := &policyv1.Eviction{ |  | ||||||
| 				ObjectMeta: metav1.ObjectMeta{ |  | ||||||
| 					Name:      pod.Name, |  | ||||||
| 					Namespace: pod.Namespace, |  | ||||||
| 				}, | 				}, | ||||||
| 			} | 			}, | ||||||
| 			f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(context.TODO(), evictTarget) | 		} | ||||||
| 			framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace) | 		job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) | ||||||
|  | 		framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) | ||||||
|  |  | ||||||
| 			ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace)) | 		ginkgo.By("Awaiting for all non 0-indexed pods to succeed to ensure the marker file is created") | ||||||
| 			err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete) | 		err = e2ejob.WaitForJobPodsSucceeded(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions-1) | ||||||
| 			framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace) | 		framework.ExpectNoError(err, "failed to await for all non 0-indexed pods to succeed for job: %s/%s", job.Name, job.Namespace) | ||||||
|  |  | ||||||
| 			ginkgo.By("Ensuring job reaches completions") | 		ginkgo.By("Awaiting for the 0-indexed pod to be running") | ||||||
| 			err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions) | 		err = e2ejob.WaitForJobPodsRunning(ctx, f.ClientSet, f.Namespace.Name, job.Name, 1) | ||||||
| 			framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) | 		framework.ExpectNoError(err, "failed to await for the 0-indexed pod to be running for the job: %s/%s", job.Name, job.Namespace) | ||||||
| 		}, |  | ||||||
| 		ginkgo.Entry("Ignore DisruptionTarget condition", &batchv1.PodFailurePolicy{ | 		pods, err := e2ejob.GetAllRunningJobPods(ctx, f.ClientSet, f.Namespace.Name, job.Name) | ||||||
|  | 		framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace) | ||||||
|  | 		gomega.Expect(pods).To(gomega.HaveLen(1), "Exactly one running pod is expected") | ||||||
|  | 		pod := pods[0] | ||||||
|  | 		ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace)) | ||||||
|  | 		evictTarget := &policyv1.Eviction{ | ||||||
|  | 			ObjectMeta: metav1.ObjectMeta{ | ||||||
|  | 				Name:      pod.Name, | ||||||
|  | 				Namespace: pod.Namespace, | ||||||
|  | 			}, | ||||||
|  | 		} | ||||||
|  | 		err = f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(ctx, evictTarget) | ||||||
|  | 		framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace) | ||||||
|  |  | ||||||
|  | 		ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace)) | ||||||
|  | 		err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete) | ||||||
|  | 		framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace) | ||||||
|  |  | ||||||
|  | 		ginkgo.By("Ensuring job reaches completions") | ||||||
|  | 		err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions) | ||||||
|  | 		framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) | ||||||
|  | 	}) | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 		Testname: Ensure pod failure policy allows to ignore failure for an evicted pod; matching on the DisruptionTarget condition | ||||||
|  | 		Description: This test is using an indexed job. The pod corresponding to the 0th index | ||||||
|  | 		creates a marker file on the host and runs 'forever' until evicted. We use | ||||||
|  | 		the non-0-indexed pods to determine if the marker file is already | ||||||
|  | 		created by the 0th indexed pod - the non-0-indexed pods fail and restart | ||||||
|  | 		until the marker file is created (their potential failures are ignored | ||||||
|  | 		based on the exit code). Once the marker file is created the 0th indexed | ||||||
|  | 		pod is evicted (DisruptionTarget condition is added in the process), | ||||||
|  | 		after restart it runs to successful completion. | ||||||
|  | 		Steps: | ||||||
|  | 		1. Select a node to run all Job's pods to ensure the host marker file is accessible by all pods | ||||||
|  | 		2. Create the indexed job with pod failure policy which ignores failed pods with DisruptionTarget condition | ||||||
|  | 		3. Await for all non-0-indexed pods to succeed to ensure the marker file is created by the 0-indexed pod | ||||||
|  | 		4. Make sure the 0-indexed pod is running | ||||||
|  | 		5. Evict the 0-indexed pod, the failure is ignored as it matches the pod failure policy | ||||||
|  | 		6. Await for the job to successfully complete | ||||||
|  | 	*/ | ||||||
|  | 	ginkgo.It("should allow to use a pod failure policy to ignore failure for an evicted pod; matching on the DisruptionTarget condition", func(ctx context.Context) { | ||||||
|  | 		// We set the backoffLimit to 0 so that any pod failure would trigger | ||||||
|  | 		// job failure if not for the pod failure policy to ignore the failed | ||||||
|  | 		// pods from counting them towards the backoffLimit. | ||||||
|  | 		parallelism := int32(2) | ||||||
|  | 		completions := int32(4) | ||||||
|  | 		backoffLimit := int32(0) | ||||||
|  |  | ||||||
|  | 		ginkgo.By("Looking for a node to schedule job pods") | ||||||
|  | 		node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) | ||||||
|  | 		framework.ExpectNoError(err) | ||||||
|  |  | ||||||
|  | 		ginkgo.By("Creating a job") | ||||||
|  | 		job := e2ejob.NewTestJobOnNode("notTerminateOnce", "evicted-pod-ignore-on-disruption-condition", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) | ||||||
|  | 		job.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion) | ||||||
|  | 		job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{ | ||||||
| 			Rules: []batchv1.PodFailurePolicyRule{ | 			Rules: []batchv1.PodFailurePolicyRule{ | ||||||
| 				{ | 				{ | ||||||
| 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created | 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created | ||||||
| @@ -261,21 +307,40 @@ var _ = SIGDescribe("Job", func() { | |||||||
| 					}, | 					}, | ||||||
| 				}, | 				}, | ||||||
| 			}, | 			}, | ||||||
| 		}), | 		} | ||||||
| 		ginkgo.Entry("Ignore exit code 137", &batchv1.PodFailurePolicy{ | 		job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) | ||||||
| 			Rules: []batchv1.PodFailurePolicyRule{ | 		framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) | ||||||
| 				{ |  | ||||||
| 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created | 		ginkgo.By("Awaiting for all non 0-indexed pods to succeed to ensure the marker file is created") | ||||||
| 					// And the 137 in the 0-indexed pod due to eviction. | 		err = e2ejob.WaitForJobPodsSucceeded(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions-1) | ||||||
| 					Action: batchv1.PodFailurePolicyActionIgnore, | 		framework.ExpectNoError(err, "failed to await for all non 0-indexed pods to succeed for job: %s/%s", job.Name, job.Namespace) | ||||||
| 					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{ |  | ||||||
| 						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn, | 		ginkgo.By("Awaiting for the 0-indexed pod to be running") | ||||||
| 						Values:   []int32{1, 137}, | 		err = e2ejob.WaitForJobPodsRunning(ctx, f.ClientSet, f.Namespace.Name, job.Name, 1) | ||||||
| 					}, | 		framework.ExpectNoError(err, "failed to await for the 0-indexed pod to be running for the job: %s/%s", job.Name, job.Namespace) | ||||||
| 				}, |  | ||||||
|  | 		pods, err := e2ejob.GetAllRunningJobPods(ctx, f.ClientSet, f.Namespace.Name, job.Name) | ||||||
|  | 		framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace) | ||||||
|  | 		gomega.Expect(pods).To(gomega.HaveLen(1), "Exactly one running pod is expected") | ||||||
|  | 		pod := pods[0] | ||||||
|  | 		ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace)) | ||||||
|  | 		evictTarget := &policyv1.Eviction{ | ||||||
|  | 			ObjectMeta: metav1.ObjectMeta{ | ||||||
|  | 				Name:      pod.Name, | ||||||
|  | 				Namespace: pod.Namespace, | ||||||
| 			}, | 			}, | ||||||
| 		}), | 		} | ||||||
| 	) | 		err = f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(ctx, evictTarget) | ||||||
|  | 		framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace) | ||||||
|  |  | ||||||
|  | 		ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace)) | ||||||
|  | 		err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete) | ||||||
|  | 		framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace) | ||||||
|  |  | ||||||
|  | 		ginkgo.By("Ensuring job reaches completions") | ||||||
|  | 		err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, completions) | ||||||
|  | 		framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) | ||||||
|  | 	}) | ||||||
|  |  | ||||||
| 	ginkgo.It("should not create pods when created in suspend state", func(ctx context.Context) { | 	ginkgo.It("should not create pods when created in suspend state", func(ctx context.Context) { | ||||||
| 		parallelism := int32(2) | 		parallelism := int32(2) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot