Merge pull request #52732 from shyamjvs/fix-metrics-perf-tests
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.. Increase api latency threshold for cluster-scoped list calls Recent change from @smarterclayton (https://github.com/kubernetes/kubernetes/pull/52237) added scope to apiserver metrics. As a result, our current threshold for list calls is no longer sufficient for all-namespace calls which are now being measured separately from namespaced lists. For e.g (from our [last 5k run](https://k8s-gubernator.appspot.com/build/kubernetes-jenkins/logs/ci-kubernetes-e2e-gce-scale-performance/37)): ``` WARNING Top latency metric: {Resource:pods Subresource: Verb:LIST Scope:cluster Latency:{Perc50:4.498374s Perc90:7.548079s Perc99:8.169389s Perc100:0s} Count:1400} ``` cc @kubernetes/sig-scalability-misc @kubernetes/sig-api-machinery-misc @wojtek-t
This commit is contained in:
		| @@ -52,8 +52,10 @@ const ( | ||||
| 	apiCallLatencyThreshold time.Duration = 1 * time.Second | ||||
|  | ||||
| 	// We use a higher threshold for list apicalls if the cluster is big (i.e having > 500 nodes) | ||||
| 	// as list response sizes are bigger in general for big clusters. | ||||
| 	// as list response sizes are bigger in general for big clusters. We also use a higher threshold | ||||
| 	// for list calls at cluster scope (this includes non-namespaced and all-namespaced calls). | ||||
| 	apiListCallLatencyThreshold      time.Duration = 5 * time.Second | ||||
| 	apiClusterScopeListCallThreshold time.Duration = 10 * time.Second | ||||
| 	bigClusterNodeCountThreshold                   = 500 | ||||
|  | ||||
| 	// Cluster Autoscaler metrics names | ||||
| @@ -371,13 +373,19 @@ func HighLatencyRequests(c clientset.Interface, nodeCount int) (int, *APIRespons | ||||
| 	for i := range metrics.APICalls { | ||||
| 		latency := metrics.APICalls[i].Latency.Perc99 | ||||
| 		isListCall := (metrics.APICalls[i].Verb == "LIST") | ||||
| 		isClusterScopedCall := (metrics.APICalls[i].Scope == "cluster") | ||||
| 		isBad := false | ||||
| 		if latency > apiCallLatencyThreshold { | ||||
| 			if !isListCall || !isBigCluster || (latency > apiListCallLatencyThreshold) { | ||||
| 		latencyThreshold := apiCallLatencyThreshold | ||||
| 		if isListCall && isBigCluster { | ||||
| 			latencyThreshold = apiListCallLatencyThreshold | ||||
| 			if isClusterScopedCall { | ||||
| 				latencyThreshold = apiClusterScopeListCallThreshold | ||||
| 			} | ||||
| 		} | ||||
| 		if latency > latencyThreshold { | ||||
| 			isBad = true | ||||
| 			badMetrics++ | ||||
| 		} | ||||
| 		} | ||||
| 		if top > 0 || isBad { | ||||
| 			top-- | ||||
| 			prefix := "" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue