Support handling of pod failures with respect to the specified rules
This commit is contained in:
@@ -22,6 +22,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/robfig/cron/v3"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
@@ -39,6 +40,33 @@ import (
|
||||
// .status.completedIndexes.
|
||||
const maxParallelismForIndexedJob = 100000
|
||||
|
||||
const (
|
||||
// maximum number of rules in pod failure policy
|
||||
maxPodFailurePolicyRules = 20
|
||||
|
||||
// maximum number of values for a OnExitCodes requirement in pod failure policy
|
||||
maxPodFailurePolicyOnExitCodesValues = 255
|
||||
|
||||
// maximum number of patterns for a OnPodConditions requirement in pod failure policy
|
||||
maxPodFailurePolicyOnPodConditionsPatterns = 20
|
||||
)
|
||||
|
||||
var (
|
||||
supportedPodFailurePolicyActions sets.String = sets.NewString(
|
||||
string(batch.PodFailurePolicyActionCount),
|
||||
string(batch.PodFailurePolicyActionFailJob),
|
||||
string(batch.PodFailurePolicyActionIgnore))
|
||||
|
||||
supportedPodFailurePolicyOnExitCodesOperator sets.String = sets.NewString(
|
||||
string(batch.PodFailurePolicyOnExitCodesOpIn),
|
||||
string(batch.PodFailurePolicyOnExitCodesOpNotIn))
|
||||
|
||||
supportedPodFailurePolicyOnPodConditionsStatus sets.String = sets.NewString(
|
||||
string(v1.ConditionFalse),
|
||||
string(v1.ConditionTrue),
|
||||
string(v1.ConditionUnknown))
|
||||
)
|
||||
|
||||
// ValidateGeneratedSelector validates that the generated selector on a controller object match the controller object
|
||||
// metadata, and the labels on the pod template are as generated.
|
||||
//
|
||||
@@ -168,6 +196,10 @@ func validateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidatio
|
||||
}
|
||||
}
|
||||
|
||||
if spec.PodFailurePolicy != nil {
|
||||
allErrs = append(allErrs, validatePodFailurePolicy(spec, fldPath.Child("podFailurePolicy"))...)
|
||||
}
|
||||
|
||||
allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(&spec.Template, fldPath.Child("template"), opts)...)
|
||||
|
||||
// spec.Template.Spec.RestartPolicy can be defaulted as RestartPolicyAlways
|
||||
@@ -179,10 +211,113 @@ func validateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidatio
|
||||
} else if spec.Template.Spec.RestartPolicy != api.RestartPolicyOnFailure && spec.Template.Spec.RestartPolicy != api.RestartPolicyNever {
|
||||
allErrs = append(allErrs, field.NotSupported(fldPath.Child("template", "spec", "restartPolicy"),
|
||||
spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyOnFailure), string(api.RestartPolicyNever)}))
|
||||
} else if spec.PodFailurePolicy != nil && spec.Template.Spec.RestartPolicy != api.RestartPolicyNever {
|
||||
allErrs = append(allErrs, field.Invalid(fldPath.Child("template", "spec", "restartPolicy"),
|
||||
spec.Template.Spec.RestartPolicy, fmt.Sprintf("only %q is supported when podFailurePolicy is specified", api.RestartPolicyNever)))
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
func validatePodFailurePolicy(spec *batch.JobSpec, fldPath *field.Path) field.ErrorList {
|
||||
var allErrs field.ErrorList
|
||||
rulesPath := fldPath.Child("rules")
|
||||
if len(spec.PodFailurePolicy.Rules) > maxPodFailurePolicyRules {
|
||||
allErrs = append(allErrs, field.TooMany(rulesPath, len(spec.PodFailurePolicy.Rules), maxPodFailurePolicyRules))
|
||||
}
|
||||
containerNames := sets.NewString()
|
||||
for _, containerSpec := range spec.Template.Spec.Containers {
|
||||
containerNames.Insert(containerSpec.Name)
|
||||
}
|
||||
for _, containerSpec := range spec.Template.Spec.InitContainers {
|
||||
containerNames.Insert(containerSpec.Name)
|
||||
}
|
||||
for i, rule := range spec.PodFailurePolicy.Rules {
|
||||
allErrs = append(allErrs, validatePodFailurePolicyRule(&rule, rulesPath.Index(i), containerNames)...)
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
func validatePodFailurePolicyRule(rule *batch.PodFailurePolicyRule, rulePath *field.Path, containerNames sets.String) field.ErrorList {
|
||||
var allErrs field.ErrorList
|
||||
actionPath := rulePath.Child("action")
|
||||
if rule.Action == "" {
|
||||
allErrs = append(allErrs, field.Required(actionPath, fmt.Sprintf("valid values: %q", supportedPodFailurePolicyActions.List())))
|
||||
} else if !supportedPodFailurePolicyActions.Has(string(rule.Action)) {
|
||||
allErrs = append(allErrs, field.NotSupported(actionPath, rule.Action, supportedPodFailurePolicyActions.List()))
|
||||
}
|
||||
if rule.OnExitCodes != nil {
|
||||
allErrs = append(allErrs, validatePodFailurePolicyRuleOnExitCodes(rule.OnExitCodes, rulePath.Child("onExitCodes"), containerNames)...)
|
||||
}
|
||||
if len(rule.OnPodConditions) > 0 {
|
||||
allErrs = append(allErrs, validatePodFailurePolicyRuleOnPodConditions(rule.OnPodConditions, rulePath.Child("onPodConditions"))...)
|
||||
}
|
||||
if rule.OnExitCodes != nil && len(rule.OnPodConditions) > 0 {
|
||||
allErrs = append(allErrs, field.Invalid(rulePath, field.OmitValueType{}, "specifying both OnExitCodes and OnPodConditions is not supported"))
|
||||
}
|
||||
if rule.OnExitCodes == nil && len(rule.OnPodConditions) == 0 {
|
||||
allErrs = append(allErrs, field.Invalid(rulePath, field.OmitValueType{}, "specifying one of OnExitCodes and OnPodConditions is required"))
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
func validatePodFailurePolicyRuleOnPodConditions(onPodConditions []batch.PodFailurePolicyOnPodConditionsPattern, onPodConditionsPath *field.Path) field.ErrorList {
|
||||
var allErrs field.ErrorList
|
||||
if len(onPodConditions) > maxPodFailurePolicyOnPodConditionsPatterns {
|
||||
allErrs = append(allErrs, field.TooMany(onPodConditionsPath, len(onPodConditions), maxPodFailurePolicyOnPodConditionsPatterns))
|
||||
}
|
||||
for j, pattern := range onPodConditions {
|
||||
patternPath := onPodConditionsPath.Index(j)
|
||||
statusPath := patternPath.Child("status")
|
||||
allErrs = append(allErrs, apivalidation.ValidateQualifiedName(string(pattern.Type), patternPath.Child("type"))...)
|
||||
if pattern.Status == "" {
|
||||
allErrs = append(allErrs, field.Required(statusPath, fmt.Sprintf("valid values: %q", supportedPodFailurePolicyOnPodConditionsStatus.List())))
|
||||
} else if !supportedPodFailurePolicyOnPodConditionsStatus.Has(string(pattern.Status)) {
|
||||
allErrs = append(allErrs, field.NotSupported(statusPath, pattern.Status, supportedPodFailurePolicyOnPodConditionsStatus.List()))
|
||||
}
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
func validatePodFailurePolicyRuleOnExitCodes(onExitCode *batch.PodFailurePolicyOnExitCodesRequirement, onExitCodesPath *field.Path, containerNames sets.String) field.ErrorList {
|
||||
var allErrs field.ErrorList
|
||||
operatorPath := onExitCodesPath.Child("operator")
|
||||
if onExitCode.Operator == "" {
|
||||
allErrs = append(allErrs, field.Required(operatorPath, fmt.Sprintf("valid values: %q", supportedPodFailurePolicyOnExitCodesOperator.List())))
|
||||
} else if !supportedPodFailurePolicyOnExitCodesOperator.Has(string(onExitCode.Operator)) {
|
||||
allErrs = append(allErrs, field.NotSupported(operatorPath, onExitCode.Operator, supportedPodFailurePolicyOnExitCodesOperator.List()))
|
||||
}
|
||||
if onExitCode.ContainerName != nil && !containerNames.Has(*onExitCode.ContainerName) {
|
||||
allErrs = append(allErrs, field.Invalid(onExitCodesPath.Child("containerName"), *onExitCode.ContainerName, "must be one of the container or initContainer names in the pod template"))
|
||||
}
|
||||
valuesPath := onExitCodesPath.Child("values")
|
||||
if len(onExitCode.Values) == 0 {
|
||||
allErrs = append(allErrs, field.Invalid(valuesPath, onExitCode.Values, "at least one value is required"))
|
||||
} else if len(onExitCode.Values) > maxPodFailurePolicyOnExitCodesValues {
|
||||
allErrs = append(allErrs, field.TooMany(valuesPath, len(onExitCode.Values), maxPodFailurePolicyOnExitCodesValues))
|
||||
}
|
||||
isOrdered := true
|
||||
uniqueValues := sets.NewInt32()
|
||||
for j, exitCodeValue := range onExitCode.Values {
|
||||
valuePath := valuesPath.Index(j)
|
||||
if onExitCode.Operator == batch.PodFailurePolicyOnExitCodesOpIn && exitCodeValue == 0 {
|
||||
allErrs = append(allErrs, field.Invalid(valuePath, exitCodeValue, "must not be 0 for the In operator"))
|
||||
}
|
||||
if uniqueValues.Has(exitCodeValue) {
|
||||
allErrs = append(allErrs, field.Duplicate(valuePath, exitCodeValue))
|
||||
} else {
|
||||
uniqueValues.Insert(exitCodeValue)
|
||||
}
|
||||
if j > 0 && onExitCode.Values[j-1] > exitCodeValue {
|
||||
isOrdered = false
|
||||
}
|
||||
}
|
||||
if !isOrdered {
|
||||
allErrs = append(allErrs, field.Invalid(valuesPath, onExitCode.Values, "must be ordered"))
|
||||
}
|
||||
|
||||
return allErrs
|
||||
}
|
||||
|
||||
// validateJobStatus validates a JobStatus and returns an ErrorList with any errors.
|
||||
func validateJobStatus(status *batch.JobStatus, fldPath *field.Path) field.ErrorList {
|
||||
allErrs := field.ErrorList{}
|
||||
@@ -241,6 +376,7 @@ func ValidateJobSpecUpdate(spec, oldSpec batch.JobSpec, fldPath *field.Path, opt
|
||||
allErrs = append(allErrs, apivalidation.ValidateImmutableField(spec.Selector, oldSpec.Selector, fldPath.Child("selector"))...)
|
||||
allErrs = append(allErrs, validatePodTemplateUpdate(spec, oldSpec, fldPath, opts)...)
|
||||
allErrs = append(allErrs, apivalidation.ValidateImmutableField(spec.CompletionMode, oldSpec.CompletionMode, fldPath.Child("completionMode"))...)
|
||||
allErrs = append(allErrs, apivalidation.ValidateImmutableField(spec.PodFailurePolicy, oldSpec.PodFailurePolicy, fldPath.Child("podFailurePolicy"))...)
|
||||
return allErrs
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user