Support handling of pod failures with respect to the specified rules

This commit is contained in:
Michal Wozniak
2022-08-04 08:21:32 +02:00
parent c8edeab234
commit bf9ce70de3
43 changed files with 5934 additions and 127 deletions

View File

@@ -18,6 +18,7 @@ package validation
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
@@ -80,23 +81,85 @@ func getValidPodTemplateSpecForGenerated(selector *metav1.LabelSelector) api.Pod
Labels: selector.MatchLabels,
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyOnFailure,
DNSPolicy: api.DNSClusterFirst,
Containers: []api.Container{{Name: "abc", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: api.TerminationMessageReadFile}},
RestartPolicy: api.RestartPolicyOnFailure,
DNSPolicy: api.DNSClusterFirst,
Containers: []api.Container{{Name: "abc", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: api.TerminationMessageReadFile}},
InitContainers: []api.Container{{Name: "def", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: api.TerminationMessageReadFile}},
},
}
}
func TestValidateJob(t *testing.T) {
validJobObjectMeta := metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
}
validManualSelector := getValidManualSelector()
validPodTemplateSpecForManual := getValidPodTemplateSpecForManual(validManualSelector)
validGeneratedSelector := getValidGeneratedSelector()
validPodTemplateSpecForGenerated := getValidPodTemplateSpecForGenerated(validGeneratedSelector)
validPodTemplateSpecForGeneratedRestartPolicyNever := getValidPodTemplateSpecForGenerated(validGeneratedSelector)
validPodTemplateSpecForGeneratedRestartPolicyNever.Spec.RestartPolicy = api.RestartPolicyNever
successCases := map[string]struct {
opts JobValidationOptions
job batch.Job
}{
"valid pod failure policy": {
job: batch.Job{
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: api.ConditionTrue,
},
},
},
{
Action: batch.PodFailurePolicyActionFailJob,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.PodConditionType("CustomConditionType"),
Status: api.ConditionFalse,
},
},
},
{
Action: batch.PodFailurePolicyActionCount,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("abc"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
},
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("def"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{4},
},
},
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
Values: []int32{5, 6, 7},
},
},
},
},
},
},
},
"valid manual selector": {
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
@@ -185,6 +248,402 @@ func TestValidateJob(t *testing.T) {
negative := int32(-1)
negative64 := int64(-1)
errorCases := map[string]batch.Job{
`spec.podFailurePolicy.rules[0]: Invalid value: specifying one of OnExitCodes and OnPodConditions is required`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.values[1]: Duplicate value: 11`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{11, 11},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.values: Too many: 256: must have at most 255 items`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: func() (values []int32) {
tooManyValues := make([]int32, maxPodFailurePolicyOnExitCodesValues+1)
for i := range tooManyValues {
tooManyValues[i] = int32(i)
}
return tooManyValues
}(),
},
},
},
},
},
},
`spec.podFailurePolicy.rules: Too many: 21: must have at most 20 items`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: func() []batch.PodFailurePolicyRule {
tooManyRules := make([]batch.PodFailurePolicyRule, maxPodFailurePolicyRules+1)
for i := range tooManyRules {
tooManyRules[i] = batch.PodFailurePolicyRule{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{int32(i + 1)},
},
}
}
return tooManyRules
}(),
},
},
},
`spec.podFailurePolicy.rules[0].onPodConditions: Too many: 21: must have at most 20 items`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnPodConditions: func() []batch.PodFailurePolicyOnPodConditionsPattern {
tooManyPatterns := make([]batch.PodFailurePolicyOnPodConditionsPattern, maxPodFailurePolicyOnPodConditionsPatterns+1)
for i := range tooManyPatterns {
tooManyPatterns[i] = batch.PodFailurePolicyOnPodConditionsPattern{
Type: api.PodConditionType(fmt.Sprintf("CustomType_%d", i)),
Status: api.ConditionTrue,
}
}
return tooManyPatterns
}(),
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.values[2]: Duplicate value: 13`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{12, 13, 13, 13},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.values: Invalid value: []int32{19, 11}: must be ordered`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{19, 11},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.values: Invalid value: []int32{}: at least one value is required`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].action: Required value: valid values: ["Count" "FailJob" "Ignore"]`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: "",
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.operator: Required value: valid values: ["In" "NotIn"]`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: "",
Values: []int32{1, 2, 3},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0]: Invalid value: specifying both OnExitCodes and OnPodConditions is not supported`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("abc"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: api.ConditionTrue,
},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.values[1]: Invalid value: 0: must not be 0 for the In operator`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 0, 2},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[1].onExitCodes.containerName: Invalid value: "xyz": must be one of the container or initContainer names in the pod template`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("abc"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
},
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("xyz"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{5, 6, 7},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].action: Unsupported value: "UnknownAction": supported values: "Count", "FailJob", "Ignore"`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: "UnknownAction",
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("abc"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onExitCodes.operator: Unsupported value: "UnknownOperator": supported values: "In", "NotIn"`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: "UnknownOperator",
Values: []int32{1, 2, 3},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onPodConditions[0].status: Required value: valid values: ["False" "True" "Unknown"]`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onPodConditions[0].status: Unsupported value: "UnknownStatus": supported values: "False", "True", "Unknown"`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: "UnknownStatus",
},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onPodConditions[0].type: Invalid value: "": name part must be non-empty`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Status: api.ConditionTrue,
},
},
},
},
},
},
},
`spec.podFailurePolicy.rules[0].onPodConditions[0].type: Invalid value: "Invalid Condition Type": name part must consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyName', or 'my.name', or '123-abc', regex used for validation is '([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]')`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.PodConditionType("Invalid Condition Type"),
Status: api.ConditionTrue,
},
},
},
},
},
},
},
`spec.template.spec.restartPolicy: Invalid value: "OnFailure": only "Never" is supported when podFailurePolicy is specified`: {
ObjectMeta: validJobObjectMeta,
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: api.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: validGeneratedSelector.MatchLabels,
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyOnFailure,
DNSPolicy: api.DNSClusterFirst,
Containers: []api.Container{{Name: "abc", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: api.TerminationMessageReadFile}},
},
},
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{},
},
},
},
"spec.parallelism:must be greater than or equal to 0": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
@@ -388,6 +847,9 @@ func TestValidateJob(t *testing.T) {
func TestValidateJobUpdate(t *testing.T) {
validGeneratedSelector := getValidGeneratedSelector()
validPodTemplateSpecForGenerated := getValidPodTemplateSpecForGenerated(validGeneratedSelector)
validPodTemplateSpecForGeneratedRestartPolicyNever := getValidPodTemplateSpecForGenerated(validGeneratedSelector)
validPodTemplateSpecForGeneratedRestartPolicyNever.Spec.RestartPolicy = api.RestartPolicyNever
validNodeAffinity := &api.Affinity{
NodeAffinity: &api.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &api.NodeSelector{
@@ -491,6 +953,100 @@ func TestValidateJobUpdate(t *testing.T) {
Field: "spec.selector",
},
},
"add pod failure policy": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
},
},
update: func(job *batch.Job) {
job.Spec.PodFailurePolicy = &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: api.ConditionTrue,
},
},
},
},
}
},
err: &field.Error{
Type: field.ErrorTypeInvalid,
Field: "spec.podFailurePolicy",
},
},
"remove pod failure policy": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: api.ConditionTrue,
},
},
},
},
},
},
},
update: func(job *batch.Job) {
job.Spec.PodFailurePolicy.Rules = append(job.Spec.PodFailurePolicy.Rules, batch.PodFailurePolicyRule{
Action: batch.PodFailurePolicyActionCount,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: api.ConditionTrue,
},
},
})
},
err: &field.Error{
Type: field.ErrorTypeInvalid,
Field: "spec.podFailurePolicy",
},
},
"update pod failure policy": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGeneratedRestartPolicyNever,
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: api.AlphaNoCompatGuaranteeDisruptionTarget,
Status: api.ConditionTrue,
},
},
},
},
},
},
},
update: func(job *batch.Job) {
job.Spec.PodFailurePolicy = nil
},
err: &field.Error{
Type: field.ErrorTypeInvalid,
Field: "spec.podFailurePolicy",
},
},
"immutable pod template": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},