Merge pull request #126067 from tenzen-y/implement-job-success-policy-e2e
Graduate the JobSuccessPolicy to Beta
This commit is contained in:
@@ -988,7 +988,12 @@ func (jm *Controller) newSuccessCondition() *batch.JobCondition {
|
||||
if delayTerminalCondition() {
|
||||
cType = batch.JobSuccessCriteriaMet
|
||||
}
|
||||
return newCondition(cType, v1.ConditionTrue, "", "", jm.clock.Now())
|
||||
var reason, message string
|
||||
if feature.DefaultFeatureGate.Enabled(features.JobSuccessPolicy) {
|
||||
reason = batch.JobReasonCompletionsReached
|
||||
message = "Reached expected number of succeeded pods"
|
||||
}
|
||||
return newCondition(cType, v1.ConditionTrue, reason, message, jm.clock.Now())
|
||||
}
|
||||
|
||||
func delayTerminalCondition() bool {
|
||||
@@ -1419,7 +1424,7 @@ func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobC
|
||||
jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
|
||||
}
|
||||
jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
|
||||
metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc()
|
||||
metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", finishedCond.Reason).Inc()
|
||||
} else {
|
||||
jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
|
||||
metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
|
||||
|
||||
@@ -4991,6 +4991,45 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
"job without successPolicy; jobSuccessPolicy is enabled; job got SuccessCriteriaMet and Completion with CompletionsReached reason conditions": {
|
||||
enableJobSuccessPolicy: true,
|
||||
enableJobManagedBy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: validTypeMeta,
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||
Completions: ptr.To[int32](1),
|
||||
Parallelism: ptr.To[int32](1),
|
||||
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||
},
|
||||
},
|
||||
pods: []v1.Pod{
|
||||
*buildPod().uid("a1").index("0").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
},
|
||||
wantStatus: batch.JobStatus{
|
||||
Failed: 0,
|
||||
Succeeded: 1,
|
||||
CompletedIndexes: "0",
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||
Conditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
{
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"when the JobSuccessPolicy is disabled, the Job never got SuccessCriteriaMet condition even if the Job has the successPolicy field": {
|
||||
job: batch.Job{
|
||||
TypeMeta: validTypeMeta,
|
||||
@@ -5132,12 +5171,16 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||
Conditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
{
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -7066,8 +7109,10 @@ func TestJobBackoffForOnFailure(t *testing.T) {
|
||||
expectedFailed: 0,
|
||||
expectedConditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -7085,12 +7130,16 @@ func TestJobBackoffForOnFailure(t *testing.T) {
|
||||
expectedFailed: 0,
|
||||
expectedConditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
{
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -7108,12 +7157,16 @@ func TestJobBackoffForOnFailure(t *testing.T) {
|
||||
expectedFailed: 0,
|
||||
expectedConditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobSuccessCriteriaMet,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
{
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Type: batch.JobComplete,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: batch.JobReasonCompletionsReached,
|
||||
Message: "Reached expected number of succeeded pods",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -55,12 +55,14 @@ var (
|
||||
},
|
||||
[]string{"completion_mode", "result", "action"},
|
||||
)
|
||||
// JobFinishedNum tracks the number of Jobs that finish. Empty reason label
|
||||
// is used to count successful jobs.
|
||||
// JobFinishedNum tracks the number of Jobs that finish.
|
||||
// TODO: Once we remove the JobSuccessPolicy feature gate, we need to remove "" reason label comment.
|
||||
// When the JobSuccessPolicy feature gate is disabled, empty reason label is used to count successful jobs.
|
||||
// Otherwise, "CompletionsReached" reason label is used to count successful jobs.
|
||||
// Possible label values:
|
||||
// completion_mode: Indexed, NonIndexed
|
||||
// result: failed, succeeded
|
||||
// reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", ""
|
||||
// reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", "SuccessPolicy", "CompletionsReached", ""
|
||||
JobFinishedNum = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: JobControllerSubsystem,
|
||||
|
||||
Reference in New Issue
Block a user