Introduce the job_finished_indexes_total metric
This commit is contained in:
@@ -1842,8 +1842,16 @@ func recordJobPodFinished(logger klog.Logger, job *batch.Job, oldCounters batch.
|
||||
// in tandem, and now a previously completed index is
|
||||
// now out of range (i.e. index >= spec.Completions).
|
||||
if isIndexedJob(job) {
|
||||
completions := int(*job.Spec.Completions)
|
||||
if job.Status.CompletedIndexes != oldCounters.CompletedIndexes {
|
||||
diff = parseIndexesFromString(logger, job.Status.CompletedIndexes, int(*job.Spec.Completions)).total() - parseIndexesFromString(logger, oldCounters.CompletedIndexes, int(*job.Spec.Completions)).total()
|
||||
diff = indexesCount(logger, &job.Status.CompletedIndexes, completions) - indexesCount(logger, &oldCounters.CompletedIndexes, completions)
|
||||
}
|
||||
backoffLimitLabel := backoffLimitMetricsLabel(job)
|
||||
metrics.JobFinishedIndexesTotal.WithLabelValues(metrics.Succeeded, backoffLimitLabel).Add(float64(diff))
|
||||
if hasBackoffLimitPerIndex(job) && job.Status.FailedIndexes != oldCounters.FailedIndexes {
|
||||
if failedDiff := indexesCount(logger, job.Status.FailedIndexes, completions) - indexesCount(logger, oldCounters.FailedIndexes, completions); failedDiff > 0 {
|
||||
metrics.JobFinishedIndexesTotal.WithLabelValues(metrics.Failed, backoffLimitLabel).Add(float64(failedDiff))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
diff = int(job.Status.Succeeded) - int(oldCounters.Succeeded)
|
||||
@@ -1855,6 +1863,20 @@ func recordJobPodFinished(logger klog.Logger, job *batch.Job, oldCounters batch.
|
||||
metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Failed).Add(float64(diff))
|
||||
}
|
||||
|
||||
func indexesCount(logger klog.Logger, indexesStr *string, completions int) int {
|
||||
if indexesStr == nil {
|
||||
return 0
|
||||
}
|
||||
return parseIndexesFromString(logger, *indexesStr, completions).total()
|
||||
}
|
||||
|
||||
func backoffLimitMetricsLabel(job *batch.Job) string {
|
||||
if hasBackoffLimitPerIndex(job) {
|
||||
return "perIndex"
|
||||
}
|
||||
return "global"
|
||||
}
|
||||
|
||||
func recordJobPodFailurePolicyActions(job *batch.Job, podFailureCountByPolicyAction map[string]int) {
|
||||
for action, count := range podFailureCountByPolicyAction {
|
||||
metrics.PodFailuresHandledByFailurePolicy.WithLabelValues(action).Add(float64(count))
|
||||
|
@@ -114,6 +114,17 @@ var (
|
||||
that have the finalizer batch.kubernetes.io/job-tracking
|
||||
The event label can be "add" or "delete".`,
|
||||
}, []string{"event"})
|
||||
|
||||
// JobFinishedIndexesTotal records the number of finished indexes.
|
||||
JobFinishedIndexesTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: JobControllerSubsystem,
|
||||
Name: "job_finished_indexes_total",
|
||||
Help: `The number of finished indexes. Possible values for the
|
||||
status label are: "succeeded", "failed". Possible values for the
|
||||
backoffLimit label are: "perIndex" and "global"`,
|
||||
},
|
||||
[]string{"status", "backoffLimit"})
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -158,5 +169,6 @@ func Register() {
|
||||
legacyregistry.MustRegister(JobPodsFinished)
|
||||
legacyregistry.MustRegister(PodFailuresHandledByFailurePolicy)
|
||||
legacyregistry.MustRegister(TerminatedPodsTrackingFinalizerTotal)
|
||||
legacyregistry.MustRegister(JobFinishedIndexesTotal)
|
||||
})
|
||||
}
|
||||
|
Reference in New Issue
Block a user