Job: Extend the jobs_finished_total metric reason label with SuccessPolicy and CompletionsReached
Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
This commit is contained in:
		@@ -1424,7 +1424,7 @@ func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobC
 | 
				
			|||||||
			jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
 | 
								jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
 | 
							jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
 | 
				
			||||||
		metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc()
 | 
							metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", finishedCond.Reason).Inc()
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
 | 
							jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
 | 
				
			||||||
		metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
 | 
							metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -55,12 +55,14 @@ var (
 | 
				
			|||||||
		},
 | 
							},
 | 
				
			||||||
		[]string{"completion_mode", "result", "action"},
 | 
							[]string{"completion_mode", "result", "action"},
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
	// JobFinishedNum tracks the number of Jobs that finish. Empty reason label
 | 
						// JobFinishedNum tracks the number of Jobs that finish.
 | 
				
			||||||
	// is used to count successful jobs.
 | 
						// TODO: Once we remove the JobSuccessPolicy feature gate, we need to remove "" reason label comment.
 | 
				
			||||||
 | 
						// When the JobSuccessPolicy feature gate is disabled, empty reason label is used to count successful jobs.
 | 
				
			||||||
 | 
						// Otherwise, "CompletionsReached" reason label is used to count successful jobs.
 | 
				
			||||||
	// Possible label values:
 | 
						// Possible label values:
 | 
				
			||||||
	//   completion_mode: Indexed, NonIndexed
 | 
						//   completion_mode: Indexed, NonIndexed
 | 
				
			||||||
	//   result:          failed, succeeded
 | 
						//   result:          failed, succeeded
 | 
				
			||||||
	//   reason:          "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", ""
 | 
						//   reason:          "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", "SuccessPolicy", "CompletionsReached", ""
 | 
				
			||||||
	JobFinishedNum = metrics.NewCounterVec(
 | 
						JobFinishedNum = metrics.NewCounterVec(
 | 
				
			||||||
		&metrics.CounterOpts{
 | 
							&metrics.CounterOpts{
 | 
				
			||||||
			Subsystem:      JobControllerSubsystem,
 | 
								Subsystem:      JobControllerSubsystem,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -547,7 +547,7 @@ func TestSuccessPolicy(t *testing.T) {
 | 
				
			|||||||
			wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
								wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
				
			||||||
			wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
								wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					Labels: []string{"Indexed", "succeeded", ""},
 | 
										Labels: []string{"Indexed", "succeeded", "SuccessPolicy"},
 | 
				
			||||||
					Value:  1,
 | 
										Value:  1,
 | 
				
			||||||
				},
 | 
									},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
@@ -587,6 +587,37 @@ func TestSuccessPolicy(t *testing.T) {
 | 
				
			|||||||
				},
 | 
									},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							"job without successPolicy; incremented the jobs_finished_total metric with CompletionsReached reason": {
 | 
				
			||||||
 | 
								enableJobSuccessPolicy: true,
 | 
				
			||||||
 | 
								job: batchv1.Job{
 | 
				
			||||||
 | 
									Spec: batchv1.JobSpec{
 | 
				
			||||||
 | 
										Parallelism:    ptr.To[int32](1),
 | 
				
			||||||
 | 
										Completions:    ptr.To[int32](1),
 | 
				
			||||||
 | 
										CompletionMode: completionModePtr(batchv1.IndexedCompletion),
 | 
				
			||||||
 | 
										Template:       podTemplateSpec,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								podTerminations: []podTerminationWithExpectations{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										index: 0,
 | 
				
			||||||
 | 
										status: v1.PodStatus{
 | 
				
			||||||
 | 
											Phase: v1.PodSucceeded,
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										wantActive:           0,
 | 
				
			||||||
 | 
										wantFailed:           0,
 | 
				
			||||||
 | 
										wantSucceeded:        1,
 | 
				
			||||||
 | 
										wantCompletedIndexes: "0",
 | 
				
			||||||
 | 
										wantTerminating:      ptr.To[int32](0),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
				
			||||||
 | 
								wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										Labels: []string{"Indexed", "succeeded", "CompletionsReached"},
 | 
				
			||||||
 | 
										Value:  1,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
		"job with successPolicy with succeededIndexes; job has SuccessCriteriaMet and Complete conditions even if some indexes remain pending": {
 | 
							"job with successPolicy with succeededIndexes; job has SuccessCriteriaMet and Complete conditions even if some indexes remain pending": {
 | 
				
			||||||
			enableJobSuccessPolicy: true,
 | 
								enableJobSuccessPolicy: true,
 | 
				
			||||||
			job: batchv1.Job{
 | 
								job: batchv1.Job{
 | 
				
			||||||
@@ -629,7 +660,7 @@ func TestSuccessPolicy(t *testing.T) {
 | 
				
			|||||||
			wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
								wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
				
			||||||
			wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
								wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					Labels: []string{"Indexed", "succeeded", ""},
 | 
										Labels: []string{"Indexed", "succeeded", "SuccessPolicy"},
 | 
				
			||||||
					Value:  1,
 | 
										Value:  1,
 | 
				
			||||||
				},
 | 
									},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
@@ -676,7 +707,7 @@ func TestSuccessPolicy(t *testing.T) {
 | 
				
			|||||||
			wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
								wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
 | 
				
			||||||
			wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
								wantJobFinishedNumMetric: []metricLabelsWithValue{
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					Labels: []string{"Indexed", "succeeded", ""},
 | 
										Labels: []string{"Indexed", "succeeded", "SuccessPolicy"},
 | 
				
			||||||
					Value:  1,
 | 
										Value:  1,
 | 
				
			||||||
				},
 | 
									},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user