Merge pull request #85236 from liu-cong/metrics-bucket
Update bucket for scheduler framework latency histograms.
This commit is contained in:
		| @@ -159,7 +159,7 @@ var ( | |||||||
| 			StabilityLevel: metrics.ALPHA, | 			StabilityLevel: metrics.ALPHA, | ||||||
| 		}, | 		}, | ||||||
| 	) | 	) | ||||||
| 	SchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram( | 	SchedulingAlgorithmPreemptionEvaluationDuration = metrics.NewHistogram( | ||||||
| 		&metrics.HistogramOpts{ | 		&metrics.HistogramOpts{ | ||||||
| 			Subsystem:      SchedulerSubsystem, | 			Subsystem:      SchedulerSubsystem, | ||||||
| 			Name:           "scheduling_algorithm_preemption_evaluation_seconds", | 			Name:           "scheduling_algorithm_preemption_evaluation_seconds", | ||||||
| @@ -168,7 +168,7 @@ var ( | |||||||
| 			StabilityLevel: metrics.ALPHA, | 			StabilityLevel: metrics.ALPHA, | ||||||
| 		}, | 		}, | ||||||
| 	) | 	) | ||||||
| 	DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = metrics.NewHistogram( | 	DeprecatedSchedulingAlgorithmPreemptionEvaluationDuration = metrics.NewHistogram( | ||||||
| 		&metrics.HistogramOpts{ | 		&metrics.HistogramOpts{ | ||||||
| 			Subsystem:      SchedulerSubsystem, | 			Subsystem:      SchedulerSubsystem, | ||||||
| 			Name:           "scheduling_algorithm_preemption_evaluation", | 			Name:           "scheduling_algorithm_preemption_evaluation", | ||||||
| @@ -228,9 +228,10 @@ var ( | |||||||
|  |  | ||||||
| 	PodSchedulingDuration = metrics.NewHistogram( | 	PodSchedulingDuration = metrics.NewHistogram( | ||||||
| 		&metrics.HistogramOpts{ | 		&metrics.HistogramOpts{ | ||||||
| 			Subsystem:      SchedulerSubsystem, | 			Subsystem: SchedulerSubsystem, | ||||||
| 			Name:           "pod_scheduling_duration_seconds", | 			Name:      "pod_scheduling_duration_seconds", | ||||||
| 			Help:           "E2e latency for a pod being scheduled which may include multiple scheduling attempts.", | 			Help:      "E2e latency for a pod being scheduled which may include multiple scheduling attempts.", | ||||||
|  | 			// Start with 1ms with the last bucket being [~16s, Inf) | ||||||
| 			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15), | 			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15), | ||||||
| 			StabilityLevel: metrics.ALPHA, | 			StabilityLevel: metrics.ALPHA, | ||||||
| 		}) | 		}) | ||||||
| @@ -246,20 +247,23 @@ var ( | |||||||
|  |  | ||||||
| 	FrameworkExtensionPointDuration = metrics.NewHistogramVec( | 	FrameworkExtensionPointDuration = metrics.NewHistogramVec( | ||||||
| 		&metrics.HistogramOpts{ | 		&metrics.HistogramOpts{ | ||||||
| 			Subsystem:      SchedulerSubsystem, | 			Subsystem: SchedulerSubsystem, | ||||||
| 			Name:           "framework_extension_point_duration_seconds", | 			Name:      "framework_extension_point_duration_seconds", | ||||||
| 			Help:           "Latency for running all plugins of a specific extension point.", | 			Help:      "Latency for running all plugins of a specific extension point.", | ||||||
| 			Buckets:        nil, | 			// Start with 0.1ms with the last bucket being [~200ms, Inf) | ||||||
|  | 			Buckets:        metrics.ExponentialBuckets(0.0001, 2, 12), | ||||||
| 			StabilityLevel: metrics.ALPHA, | 			StabilityLevel: metrics.ALPHA, | ||||||
| 		}, | 		}, | ||||||
| 		[]string{"extension_point", "status"}) | 		[]string{"extension_point", "status"}) | ||||||
|  |  | ||||||
| 	PluginExecutionDuration = metrics.NewHistogramVec( | 	PluginExecutionDuration = metrics.NewHistogramVec( | ||||||
| 		&metrics.HistogramOpts{ | 		&metrics.HistogramOpts{ | ||||||
| 			Subsystem:      SchedulerSubsystem, | 			Subsystem: SchedulerSubsystem, | ||||||
| 			Name:           "plugin_execution_duration_seconds", | 			Name:      "plugin_execution_duration_seconds", | ||||||
| 			Help:           "Duration for running a plugin at a specific extension point.", | 			Help:      "Duration for running a plugin at a specific extension point.", | ||||||
| 			Buckets:        nil, | 			// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5) | ||||||
|  | 			// so that we have better granularity since plugin latency is very sensitive. | ||||||
|  | 			Buckets:        metrics.ExponentialBuckets(0.00001, 1.5, 20), | ||||||
| 			StabilityLevel: metrics.ALPHA, | 			StabilityLevel: metrics.ALPHA, | ||||||
| 		}, | 		}, | ||||||
| 		[]string{"plugin", "extension_point", "status"}) | 		[]string{"plugin", "extension_point", "status"}) | ||||||
| @@ -304,8 +308,8 @@ var ( | |||||||
| 		DeprecatedSchedulingAlgorithmPredicateEvaluationDuration, | 		DeprecatedSchedulingAlgorithmPredicateEvaluationDuration, | ||||||
| 		SchedulingAlgorithmPriorityEvaluationDuration, | 		SchedulingAlgorithmPriorityEvaluationDuration, | ||||||
| 		DeprecatedSchedulingAlgorithmPriorityEvaluationDuration, | 		DeprecatedSchedulingAlgorithmPriorityEvaluationDuration, | ||||||
| 		SchedulingAlgorithmPremptionEvaluationDuration, | 		SchedulingAlgorithmPreemptionEvaluationDuration, | ||||||
| 		DeprecatedSchedulingAlgorithmPremptionEvaluationDuration, | 		DeprecatedSchedulingAlgorithmPreemptionEvaluationDuration, | ||||||
| 		PreemptionVictims, | 		PreemptionVictims, | ||||||
| 		PreemptionAttempts, | 		PreemptionAttempts, | ||||||
| 		pendingPods, | 		pendingPods, | ||||||
|   | |||||||
| @@ -624,8 +624,8 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) { | |||||||
| 				preemptionStartTime := time.Now() | 				preemptionStartTime := time.Now() | ||||||
| 				sched.preempt(schedulingCycleCtx, state, fwk, pod, fitError) | 				sched.preempt(schedulingCycleCtx, state, fwk, pod, fitError) | ||||||
| 				metrics.PreemptionAttempts.Inc() | 				metrics.PreemptionAttempts.Inc() | ||||||
| 				metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) | 				metrics.SchedulingAlgorithmPreemptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) | ||||||
| 				metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) | 				metrics.DeprecatedSchedulingAlgorithmPreemptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) | ||||||
| 				metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) | 				metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) | ||||||
| 				metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) | 				metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) | ||||||
| 			} | 			} | ||||||
|   | |||||||
| @@ -48,7 +48,7 @@ var ( | |||||||
|  |  | ||||||
| // BenchmarkScheduling benchmarks the scheduling rate when the cluster has | // BenchmarkScheduling benchmarks the scheduling rate when the cluster has | ||||||
| // various quantities of nodes and scheduled pods. | // various quantities of nodes and scheduled pods. | ||||||
| func BenchmarkSchedulingV(b *testing.B) { | func BenchmarkScheduling(b *testing.B) { | ||||||
| 	tests := []struct{ nodes, existingPods, minPods int }{ | 	tests := []struct{ nodes, existingPods, minPods int }{ | ||||||
| 		{nodes: 100, existingPods: 0, minPods: 100}, | 		{nodes: 100, existingPods: 0, minPods: 100}, | ||||||
| 		{nodes: 100, existingPods: 1000, minPods: 100}, | 		{nodes: 100, existingPods: 1000, minPods: 100}, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot