Support handling of pod failures with respect to the specified rules

This commit is contained in:
Michal Wozniak
2022-08-04 08:21:32 +02:00
parent c8edeab234
commit bf9ce70de3
43 changed files with 5934 additions and 127 deletions

View File

@@ -22,6 +22,7 @@ import (
"time"
"github.com/robfig/cron/v3"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/labels"
@@ -39,6 +40,33 @@ import (
// .status.completedIndexes.
const maxParallelismForIndexedJob = 100000
const (
// maximum number of rules in pod failure policy
maxPodFailurePolicyRules = 20
// maximum number of values for a OnExitCodes requirement in pod failure policy
maxPodFailurePolicyOnExitCodesValues = 255
// maximum number of patterns for a OnPodConditions requirement in pod failure policy
maxPodFailurePolicyOnPodConditionsPatterns = 20
)
var (
supportedPodFailurePolicyActions sets.String = sets.NewString(
string(batch.PodFailurePolicyActionCount),
string(batch.PodFailurePolicyActionFailJob),
string(batch.PodFailurePolicyActionIgnore))
supportedPodFailurePolicyOnExitCodesOperator sets.String = sets.NewString(
string(batch.PodFailurePolicyOnExitCodesOpIn),
string(batch.PodFailurePolicyOnExitCodesOpNotIn))
supportedPodFailurePolicyOnPodConditionsStatus sets.String = sets.NewString(
string(v1.ConditionFalse),
string(v1.ConditionTrue),
string(v1.ConditionUnknown))
)
// ValidateGeneratedSelector validates that the generated selector on a controller object match the controller object
// metadata, and the labels on the pod template are as generated.
//
@@ -168,6 +196,10 @@ func validateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidatio
}
}
if spec.PodFailurePolicy != nil {
allErrs = append(allErrs, validatePodFailurePolicy(spec, fldPath.Child("podFailurePolicy"))...)
}
allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(&spec.Template, fldPath.Child("template"), opts)...)
// spec.Template.Spec.RestartPolicy can be defaulted as RestartPolicyAlways
@@ -179,10 +211,113 @@ func validateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidatio
} else if spec.Template.Spec.RestartPolicy != api.RestartPolicyOnFailure && spec.Template.Spec.RestartPolicy != api.RestartPolicyNever {
allErrs = append(allErrs, field.NotSupported(fldPath.Child("template", "spec", "restartPolicy"),
spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyOnFailure), string(api.RestartPolicyNever)}))
} else if spec.PodFailurePolicy != nil && spec.Template.Spec.RestartPolicy != api.RestartPolicyNever {
allErrs = append(allErrs, field.Invalid(fldPath.Child("template", "spec", "restartPolicy"),
spec.Template.Spec.RestartPolicy, fmt.Sprintf("only %q is supported when podFailurePolicy is specified", api.RestartPolicyNever)))
}
return allErrs
}
func validatePodFailurePolicy(spec *batch.JobSpec, fldPath *field.Path) field.ErrorList {
var allErrs field.ErrorList
rulesPath := fldPath.Child("rules")
if len(spec.PodFailurePolicy.Rules) > maxPodFailurePolicyRules {
allErrs = append(allErrs, field.TooMany(rulesPath, len(spec.PodFailurePolicy.Rules), maxPodFailurePolicyRules))
}
containerNames := sets.NewString()
for _, containerSpec := range spec.Template.Spec.Containers {
containerNames.Insert(containerSpec.Name)
}
for _, containerSpec := range spec.Template.Spec.InitContainers {
containerNames.Insert(containerSpec.Name)
}
for i, rule := range spec.PodFailurePolicy.Rules {
allErrs = append(allErrs, validatePodFailurePolicyRule(&rule, rulesPath.Index(i), containerNames)...)
}
return allErrs
}
func validatePodFailurePolicyRule(rule *batch.PodFailurePolicyRule, rulePath *field.Path, containerNames sets.String) field.ErrorList {
var allErrs field.ErrorList
actionPath := rulePath.Child("action")
if rule.Action == "" {
allErrs = append(allErrs, field.Required(actionPath, fmt.Sprintf("valid values: %q", supportedPodFailurePolicyActions.List())))
} else if !supportedPodFailurePolicyActions.Has(string(rule.Action)) {
allErrs = append(allErrs, field.NotSupported(actionPath, rule.Action, supportedPodFailurePolicyActions.List()))
}
if rule.OnExitCodes != nil {
allErrs = append(allErrs, validatePodFailurePolicyRuleOnExitCodes(rule.OnExitCodes, rulePath.Child("onExitCodes"), containerNames)...)
}
if len(rule.OnPodConditions) > 0 {
allErrs = append(allErrs, validatePodFailurePolicyRuleOnPodConditions(rule.OnPodConditions, rulePath.Child("onPodConditions"))...)
}
if rule.OnExitCodes != nil && len(rule.OnPodConditions) > 0 {
allErrs = append(allErrs, field.Invalid(rulePath, field.OmitValueType{}, "specifying both OnExitCodes and OnPodConditions is not supported"))
}
if rule.OnExitCodes == nil && len(rule.OnPodConditions) == 0 {
allErrs = append(allErrs, field.Invalid(rulePath, field.OmitValueType{}, "specifying one of OnExitCodes and OnPodConditions is required"))
}
return allErrs
}
func validatePodFailurePolicyRuleOnPodConditions(onPodConditions []batch.PodFailurePolicyOnPodConditionsPattern, onPodConditionsPath *field.Path) field.ErrorList {
var allErrs field.ErrorList
if len(onPodConditions) > maxPodFailurePolicyOnPodConditionsPatterns {
allErrs = append(allErrs, field.TooMany(onPodConditionsPath, len(onPodConditions), maxPodFailurePolicyOnPodConditionsPatterns))
}
for j, pattern := range onPodConditions {
patternPath := onPodConditionsPath.Index(j)
statusPath := patternPath.Child("status")
allErrs = append(allErrs, apivalidation.ValidateQualifiedName(string(pattern.Type), patternPath.Child("type"))...)
if pattern.Status == "" {
allErrs = append(allErrs, field.Required(statusPath, fmt.Sprintf("valid values: %q", supportedPodFailurePolicyOnPodConditionsStatus.List())))
} else if !supportedPodFailurePolicyOnPodConditionsStatus.Has(string(pattern.Status)) {
allErrs = append(allErrs, field.NotSupported(statusPath, pattern.Status, supportedPodFailurePolicyOnPodConditionsStatus.List()))
}
}
return allErrs
}
func validatePodFailurePolicyRuleOnExitCodes(onExitCode *batch.PodFailurePolicyOnExitCodesRequirement, onExitCodesPath *field.Path, containerNames sets.String) field.ErrorList {
var allErrs field.ErrorList
operatorPath := onExitCodesPath.Child("operator")
if onExitCode.Operator == "" {
allErrs = append(allErrs, field.Required(operatorPath, fmt.Sprintf("valid values: %q", supportedPodFailurePolicyOnExitCodesOperator.List())))
} else if !supportedPodFailurePolicyOnExitCodesOperator.Has(string(onExitCode.Operator)) {
allErrs = append(allErrs, field.NotSupported(operatorPath, onExitCode.Operator, supportedPodFailurePolicyOnExitCodesOperator.List()))
}
if onExitCode.ContainerName != nil && !containerNames.Has(*onExitCode.ContainerName) {
allErrs = append(allErrs, field.Invalid(onExitCodesPath.Child("containerName"), *onExitCode.ContainerName, "must be one of the container or initContainer names in the pod template"))
}
valuesPath := onExitCodesPath.Child("values")
if len(onExitCode.Values) == 0 {
allErrs = append(allErrs, field.Invalid(valuesPath, onExitCode.Values, "at least one value is required"))
} else if len(onExitCode.Values) > maxPodFailurePolicyOnExitCodesValues {
allErrs = append(allErrs, field.TooMany(valuesPath, len(onExitCode.Values), maxPodFailurePolicyOnExitCodesValues))
}
isOrdered := true
uniqueValues := sets.NewInt32()
for j, exitCodeValue := range onExitCode.Values {
valuePath := valuesPath.Index(j)
if onExitCode.Operator == batch.PodFailurePolicyOnExitCodesOpIn && exitCodeValue == 0 {
allErrs = append(allErrs, field.Invalid(valuePath, exitCodeValue, "must not be 0 for the In operator"))
}
if uniqueValues.Has(exitCodeValue) {
allErrs = append(allErrs, field.Duplicate(valuePath, exitCodeValue))
} else {
uniqueValues.Insert(exitCodeValue)
}
if j > 0 && onExitCode.Values[j-1] > exitCodeValue {
isOrdered = false
}
}
if !isOrdered {
allErrs = append(allErrs, field.Invalid(valuesPath, onExitCode.Values, "must be ordered"))
}
return allErrs
}
// validateJobStatus validates a JobStatus and returns an ErrorList with any errors.
func validateJobStatus(status *batch.JobStatus, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
@@ -241,6 +376,7 @@ func ValidateJobSpecUpdate(spec, oldSpec batch.JobSpec, fldPath *field.Path, opt
allErrs = append(allErrs, apivalidation.ValidateImmutableField(spec.Selector, oldSpec.Selector, fldPath.Child("selector"))...)
allErrs = append(allErrs, validatePodTemplateUpdate(spec, oldSpec, fldPath, opts)...)
allErrs = append(allErrs, apivalidation.ValidateImmutableField(spec.CompletionMode, oldSpec.CompletionMode, fldPath.Child("completionMode"))...)
allErrs = append(allErrs, apivalidation.ValidateImmutableField(spec.PodFailurePolicy, oldSpec.PodFailurePolicy, fldPath.Child("podFailurePolicy"))...)
return allErrs
}