Merge pull request #111113 from mimowo/retriable-pod-failures-job-controller
Support handling of pod failures with respect to the configured rules
This commit is contained in:
@@ -100,6 +100,9 @@ func (jobStrategy) PrepareForCreate(ctx context.Context, obj runtime.Object) {
|
||||
} else {
|
||||
dropJobTrackingAnnotation(job)
|
||||
}
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
|
||||
job.Spec.PodFailurePolicy = nil
|
||||
}
|
||||
|
||||
pod.DropDisabledTemplateFields(&job.Spec.Template, nil)
|
||||
}
|
||||
@@ -133,6 +136,10 @@ func (jobStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object
|
||||
dropJobTrackingAnnotation(newJob)
|
||||
}
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && oldJob.Spec.PodFailurePolicy == nil {
|
||||
newJob.Spec.PodFailurePolicy = nil
|
||||
}
|
||||
|
||||
pod.DropDisabledTemplateFields(&newJob.Spec.Template, &oldJob.Spec.Template)
|
||||
|
||||
// Any changes to the spec increment the generation number.
|
||||
|
@@ -40,6 +40,244 @@ import (
|
||||
|
||||
var ignoreErrValueDetail = cmpopts.IgnoreFields(field.Error{}, "BadValue", "Detail")
|
||||
|
||||
// TestJobStrategy_PrepareForUpdate tests various scenearios for PrepareForUpdate
|
||||
func TestJobStrategy_PrepareForUpdate(t *testing.T) {
|
||||
validSelector := getValidLabelSelector()
|
||||
validPodTemplateSpec := getValidPodTemplateSpecForSelector(validSelector)
|
||||
|
||||
podFailurePolicy := &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
ContainerName: pointer.String("container-name"),
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{1},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
updatedPodFailurePolicy := &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
ContainerName: pointer.String("updated-container-name"),
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{2},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cases := map[string]struct {
|
||||
enableJobPodFailurePolicy bool
|
||||
job batch.Job
|
||||
updatedJob batch.Job
|
||||
wantJob batch.Job
|
||||
}{
|
||||
"update job with a new field; updated when JobPodFailurePolicy enabled": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: nil,
|
||||
},
|
||||
},
|
||||
updatedJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
wantJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(1),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
},
|
||||
"update job with a new field; not updated when JobPodFailurePolicy disabled": {
|
||||
enableJobPodFailurePolicy: false,
|
||||
job: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: nil,
|
||||
},
|
||||
},
|
||||
updatedJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
wantJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
"update pre-existing field; updated when JobPodFailurePolicy enabled": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: podFailurePolicy,
|
||||
},
|
||||
},
|
||||
updatedJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
wantJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(1),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
},
|
||||
"update pre-existing field; updated when JobPodFailurePolicy disabled": {
|
||||
enableJobPodFailurePolicy: false,
|
||||
job: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: podFailurePolicy,
|
||||
},
|
||||
},
|
||||
updatedJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
wantJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(1),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: updatedPodFailurePolicy,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for name, tc := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
||||
ctx := genericapirequest.NewDefaultContext()
|
||||
|
||||
Strategy.PrepareForUpdate(ctx, &tc.updatedJob, &tc.job)
|
||||
|
||||
if diff := cmp.Diff(tc.wantJob, tc.updatedJob); diff != "" {
|
||||
t.Errorf("Job pod failure policy (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestJobStrategy_PrepareForUpdate tests various scenearios for PrepareForCreate
|
||||
func TestJobStrategy_PrepareForCreate(t *testing.T) {
|
||||
validSelector := getValidLabelSelector()
|
||||
validPodTemplateSpec := getValidPodTemplateSpecForSelector(validSelector)
|
||||
|
||||
podFailurePolicy := &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
ContainerName: pointer.String("container-name"),
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{1},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cases := map[string]struct {
|
||||
enableJobPodFailurePolicy bool
|
||||
job batch.Job
|
||||
wantJob batch.Job
|
||||
}{
|
||||
"create job with a new field; JobPodFailurePolicy enabled": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: podFailurePolicy,
|
||||
},
|
||||
},
|
||||
wantJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMetaWithAnnotations(1, map[string]string{batchv1.JobTrackingFinalizer: ""}),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: podFailurePolicy,
|
||||
},
|
||||
},
|
||||
},
|
||||
"create job with a new field; JobPodFailurePolicy disabled": {
|
||||
enableJobPodFailurePolicy: false,
|
||||
job: batch.Job{
|
||||
ObjectMeta: getValidObjectMeta(0),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: podFailurePolicy,
|
||||
},
|
||||
},
|
||||
wantJob: batch.Job{
|
||||
ObjectMeta: getValidObjectMetaWithAnnotations(1, map[string]string{batchv1.JobTrackingFinalizer: ""}),
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validPodTemplateSpec,
|
||||
PodFailurePolicy: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for name, tc := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
||||
ctx := genericapirequest.NewDefaultContext()
|
||||
|
||||
Strategy.PrepareForCreate(ctx, &tc.job)
|
||||
|
||||
if diff := cmp.Diff(tc.wantJob, tc.job); diff != "" {
|
||||
t.Errorf("Job pod failure policy (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(#111514): refactor by spliting into dedicated test functions
|
||||
func TestJobStrategy(t *testing.T) {
|
||||
cases := map[string]struct {
|
||||
trackingWithFinalizersEnabled bool
|
||||
@@ -589,3 +827,35 @@ func TestSelectableFieldLabelConversions(t *testing.T) {
|
||||
func completionModePtr(m batch.CompletionMode) *batch.CompletionMode {
|
||||
return &m
|
||||
}
|
||||
|
||||
func getValidObjectMeta(generation int64) metav1.ObjectMeta {
|
||||
return getValidObjectMetaWithAnnotations(generation, nil)
|
||||
}
|
||||
|
||||
func getValidObjectMetaWithAnnotations(generation int64, annotations map[string]string) metav1.ObjectMeta {
|
||||
return metav1.ObjectMeta{
|
||||
Name: "myjob",
|
||||
Namespace: metav1.NamespaceDefault,
|
||||
Generation: generation,
|
||||
Annotations: annotations,
|
||||
}
|
||||
}
|
||||
|
||||
func getValidLabelSelector() *metav1.LabelSelector {
|
||||
return &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{"a": "b"},
|
||||
}
|
||||
}
|
||||
|
||||
func getValidPodTemplateSpecForSelector(validSelector *metav1.LabelSelector) api.PodTemplateSpec {
|
||||
return api.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: validSelector.MatchLabels,
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
RestartPolicy: api.RestartPolicyOnFailure,
|
||||
DNSPolicy: api.DNSClusterFirst,
|
||||
Containers: []api.Container{{Name: "abc", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: api.TerminationMessageReadFile}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user