
A future change will stop using this signal and instead use a label selector passed on creation.
364 lines
13 KiB
Go
364 lines
13 KiB
Go
/*
|
|
Copyright 2019 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package metrics
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
"k8s.io/kubernetes/pkg/master/ports"
|
|
schedulermetric "k8s.io/kubernetes/pkg/scheduler/metrics"
|
|
e2elog "k8s.io/kubernetes/test/e2e/framework/log"
|
|
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
|
|
"k8s.io/kubernetes/test/e2e/system"
|
|
|
|
"github.com/onsi/gomega"
|
|
|
|
"github.com/prometheus/common/model"
|
|
)
|
|
|
|
const (
|
|
// SingleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
|
|
// transient failures from failing tests.
|
|
// TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
|
|
SingleCallTimeout = 5 * time.Minute
|
|
|
|
// nodeStartupThreshold is a rough estimate of the time allocated for a pod to start on a node.
|
|
nodeStartupThreshold = 4 * time.Second
|
|
|
|
// We are setting 1s threshold for apicalls even in small clusters to avoid flakes.
|
|
// The problem is that if long GC is happening in small clusters (where we have e.g.
|
|
// 1-core master machines) and tests are pretty short, it may consume significant
|
|
// portion of CPU and basically stop all the real work.
|
|
// Increasing threshold to 1s is within our SLO and should solve this problem.
|
|
apiCallLatencyThreshold time.Duration = 1 * time.Second
|
|
|
|
// We use a higher threshold for list apicalls if the cluster is big (i.e having > 500 nodes)
|
|
// as list response sizes are bigger in general for big clusters. We also use a higher threshold
|
|
// for list calls at cluster scope (this includes non-namespaced and all-namespaced calls).
|
|
apiListCallLatencyThreshold time.Duration = 5 * time.Second
|
|
apiClusterScopeListCallThreshold time.Duration = 10 * time.Second
|
|
bigClusterNodeCountThreshold = 500
|
|
)
|
|
|
|
var schedulingLatencyMetricName = model.LabelValue(schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName)
|
|
|
|
func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
|
|
var a APIResponsiveness
|
|
|
|
body, err := getMetrics(c)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
samples, err := extractMetricSamples(body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ignoredResources := sets.NewString("events")
|
|
// TODO: figure out why we're getting non-capitalized proxy and fix this.
|
|
ignoredVerbs := sets.NewString("WATCH", "WATCHLIST", "PROXY", "proxy", "CONNECT")
|
|
|
|
for _, sample := range samples {
|
|
// Example line:
|
|
// apiserver_request_latencies_summary{resource="namespaces",verb="LIST",quantile="0.99"} 908
|
|
// apiserver_request_total{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
|
|
if sample.Metric[model.MetricNameLabel] != "apiserver_request_latencies_summary" &&
|
|
sample.Metric[model.MetricNameLabel] != "apiserver_request_total" {
|
|
continue
|
|
}
|
|
|
|
resource := string(sample.Metric["resource"])
|
|
subresource := string(sample.Metric["subresource"])
|
|
verb := string(sample.Metric["verb"])
|
|
scope := string(sample.Metric["scope"])
|
|
if ignoredResources.Has(resource) || ignoredVerbs.Has(verb) {
|
|
continue
|
|
}
|
|
|
|
switch sample.Metric[model.MetricNameLabel] {
|
|
case "apiserver_request_latencies_summary":
|
|
latency := sample.Value
|
|
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond)
|
|
case "apiserver_request_total":
|
|
count := sample.Value
|
|
a.addMetricRequestCount(resource, subresource, verb, scope, int(count))
|
|
|
|
}
|
|
}
|
|
|
|
return &a, err
|
|
}
|
|
|
|
// HighLatencyRequests prints top five summary metrics for request types with latency and returns
|
|
// number of such request types above threshold. We use a higher threshold for
|
|
// list calls if nodeCount is above a given threshold (i.e. cluster is big).
|
|
func HighLatencyRequests(c clientset.Interface, nodeCount int) (int, *APIResponsiveness, error) {
|
|
isBigCluster := (nodeCount > bigClusterNodeCountThreshold)
|
|
metrics, err := readLatencyMetrics(c)
|
|
if err != nil {
|
|
return 0, metrics, err
|
|
}
|
|
sort.Sort(sort.Reverse(metrics))
|
|
badMetrics := 0
|
|
top := 5
|
|
for i := range metrics.APICalls {
|
|
latency := metrics.APICalls[i].Latency.Perc99
|
|
isListCall := (metrics.APICalls[i].Verb == "LIST")
|
|
isClusterScopedCall := (metrics.APICalls[i].Scope == "cluster")
|
|
isBad := false
|
|
latencyThreshold := apiCallLatencyThreshold
|
|
if isListCall && isBigCluster {
|
|
latencyThreshold = apiListCallLatencyThreshold
|
|
if isClusterScopedCall {
|
|
latencyThreshold = apiClusterScopeListCallThreshold
|
|
}
|
|
}
|
|
if latency > latencyThreshold {
|
|
isBad = true
|
|
badMetrics++
|
|
}
|
|
if top > 0 || isBad {
|
|
top--
|
|
prefix := ""
|
|
if isBad {
|
|
prefix = "WARNING "
|
|
}
|
|
e2elog.Logf("%vTop latency metric: %+v", prefix, metrics.APICalls[i])
|
|
}
|
|
}
|
|
return badMetrics, metrics, nil
|
|
}
|
|
|
|
// VerifyLatencyWithinThreshold verifies whether 50, 90 and 99th percentiles of a latency metric are
|
|
// within the expected threshold.
|
|
func VerifyLatencyWithinThreshold(threshold, actual LatencyMetric, metricName string) error {
|
|
if actual.Perc50 > threshold.Perc50 {
|
|
return fmt.Errorf("too high %v latency 50th percentile: %v", metricName, actual.Perc50)
|
|
}
|
|
if actual.Perc90 > threshold.Perc90 {
|
|
return fmt.Errorf("too high %v latency 90th percentile: %v", metricName, actual.Perc90)
|
|
}
|
|
if actual.Perc99 > threshold.Perc99 {
|
|
return fmt.Errorf("too high %v latency 99th percentile: %v", metricName, actual.Perc99)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ResetMetrics resets latency metrics in apiserver.
|
|
func ResetMetrics(c clientset.Interface) error {
|
|
e2elog.Logf("Resetting latency metrics in apiserver...")
|
|
body, err := c.CoreV1().RESTClient().Delete().AbsPath("/metrics").DoRaw()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if string(body) != "metrics reset\n" {
|
|
return fmt.Errorf("Unexpected response: %q", string(body))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Retrieves metrics information.
|
|
func getMetrics(c clientset.Interface) (string, error) {
|
|
body, err := c.CoreV1().RESTClient().Get().AbsPath("/metrics").DoRaw()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(body), nil
|
|
}
|
|
|
|
// Sends REST request to kube scheduler metrics
|
|
func sendRestRequestToScheduler(c clientset.Interface, op, provider, cloudMasterName, masterHostname string) (string, error) {
|
|
opUpper := strings.ToUpper(op)
|
|
if opUpper != "GET" && opUpper != "DELETE" {
|
|
return "", fmt.Errorf("Unknown REST request")
|
|
}
|
|
|
|
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
|
|
// The following 4 lines are intended to replace framework.ExpectNoError(err)
|
|
if err != nil {
|
|
e2elog.Logf("Unexpected error occurred: %v", err)
|
|
}
|
|
gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
|
|
|
|
var masterRegistered = false
|
|
for _, node := range nodes.Items {
|
|
if system.DeprecatedMightBeMasterNode(node.Name) {
|
|
masterRegistered = true
|
|
}
|
|
}
|
|
|
|
var responseText string
|
|
if masterRegistered {
|
|
ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout)
|
|
defer cancel()
|
|
|
|
body, err := c.CoreV1().RESTClient().Verb(opUpper).
|
|
Context(ctx).
|
|
Namespace(metav1.NamespaceSystem).
|
|
Resource("pods").
|
|
Name(fmt.Sprintf("kube-scheduler-%v:%v", cloudMasterName, ports.InsecureSchedulerPort)).
|
|
SubResource("proxy").
|
|
Suffix("metrics").
|
|
Do().Raw()
|
|
|
|
// The following 4 lines are intended to replace
|
|
// framework.ExpectNoError(err).
|
|
if err != nil {
|
|
e2elog.Logf("Unexpected error occurred: %v", err)
|
|
}
|
|
gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
|
|
responseText = string(body)
|
|
} else {
|
|
// If master is not registered fall back to old method of using SSH.
|
|
if provider == "gke" || provider == "eks" {
|
|
e2elog.Logf("Not grabbing scheduler metrics through master SSH: unsupported for %s", provider)
|
|
return "", nil
|
|
}
|
|
|
|
cmd := "curl -X " + opUpper + " http://localhost:10251/metrics"
|
|
sshResult, err := e2essh.SSH(cmd, masterHostname+":22", provider)
|
|
if err != nil || sshResult.Code != 0 {
|
|
return "", fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
|
|
}
|
|
responseText = sshResult.Stdout
|
|
}
|
|
return responseText, nil
|
|
}
|
|
|
|
// Retrieves scheduler latency metrics.
|
|
func getSchedulingLatency(c clientset.Interface, provider, cloudMasterName, masterHostname string) (*SchedulingMetrics, error) {
|
|
result := SchedulingMetrics{}
|
|
data, err := sendRestRequestToScheduler(c, "GET", provider, cloudMasterName, masterHostname)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
samples, err := extractMetricSamples(data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, sample := range samples {
|
|
if sample.Metric[model.MetricNameLabel] != schedulingLatencyMetricName {
|
|
continue
|
|
}
|
|
|
|
var metric *LatencyMetric
|
|
switch sample.Metric[schedulermetric.OperationLabel] {
|
|
case schedulermetric.PredicateEvaluation:
|
|
metric = &result.PredicateEvaluationLatency
|
|
case schedulermetric.PriorityEvaluation:
|
|
metric = &result.PriorityEvaluationLatency
|
|
case schedulermetric.PreemptionEvaluation:
|
|
metric = &result.PreemptionEvaluationLatency
|
|
case schedulermetric.Binding:
|
|
metric = &result.BindingLatency
|
|
}
|
|
if metric == nil {
|
|
continue
|
|
}
|
|
|
|
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
setQuantile(metric, quantile, time.Duration(int64(float64(sample.Value)*float64(time.Second))))
|
|
}
|
|
return &result, nil
|
|
}
|
|
|
|
// VerifySchedulerLatency verifies (currently just by logging them) the scheduling latencies.
|
|
func VerifySchedulerLatency(c clientset.Interface, provider, cloudMasterName, masterHostname string) (*SchedulingMetrics, error) {
|
|
latency, err := getSchedulingLatency(c, provider, cloudMasterName, masterHostname)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return latency, nil
|
|
}
|
|
|
|
// ResetSchedulerMetrics sends a DELETE request to kube-scheduler for resetting metrics.
|
|
func ResetSchedulerMetrics(c clientset.Interface, provider, cloudMasterName, masterHostname string) error {
|
|
responseText, err := sendRestRequestToScheduler(c, "DELETE", provider, cloudMasterName, masterHostname)
|
|
if err != nil {
|
|
return fmt.Errorf("Unexpected response: %q, %v", responseText, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// PodLatencyData encapsulates pod startup latency information.
|
|
type PodLatencyData struct {
|
|
// Name of the pod
|
|
Name string
|
|
// Node this pod was running on
|
|
Node string
|
|
// Latency information related to pod startuptime
|
|
Latency time.Duration
|
|
}
|
|
|
|
// LatencySlice is an array of PodLatencyData which encapsulates pod startup latency information.
|
|
type LatencySlice []PodLatencyData
|
|
|
|
func (a LatencySlice) Len() int { return len(a) }
|
|
func (a LatencySlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a LatencySlice) Less(i, j int) bool { return a[i].Latency < a[j].Latency }
|
|
|
|
// ExtractLatencyMetrics returns latency metrics for each percentile(50th, 90th and 99th).
|
|
func ExtractLatencyMetrics(latencies []PodLatencyData) LatencyMetric {
|
|
length := len(latencies)
|
|
perc50 := latencies[int(math.Ceil(float64(length*50)/100))-1].Latency
|
|
perc90 := latencies[int(math.Ceil(float64(length*90)/100))-1].Latency
|
|
perc99 := latencies[int(math.Ceil(float64(length*99)/100))-1].Latency
|
|
perc100 := latencies[length-1].Latency
|
|
return LatencyMetric{Perc50: perc50, Perc90: perc90, Perc99: perc99, Perc100: perc100}
|
|
}
|
|
|
|
// LogSuspiciousLatency logs metrics/docker errors from all nodes that had slow startup times
|
|
// If latencyDataLag is nil then it will be populated from latencyData
|
|
func LogSuspiciousLatency(latencyData []PodLatencyData, latencyDataLag []PodLatencyData, nodeCount int, c clientset.Interface) {
|
|
if latencyDataLag == nil {
|
|
latencyDataLag = latencyData
|
|
}
|
|
for _, l := range latencyData {
|
|
if l.Latency > nodeStartupThreshold {
|
|
HighLatencyKubeletOperations(c, 1*time.Second, l.Node, e2elog.Logf)
|
|
}
|
|
}
|
|
e2elog.Logf("Approx throughput: %v pods/min",
|
|
float64(nodeCount)/(latencyDataLag[len(latencyDataLag)-1].Latency.Minutes()))
|
|
}
|
|
|
|
// PrintLatencies outputs latencies to log with readable format.
|
|
func PrintLatencies(latencies []PodLatencyData, header string) {
|
|
metrics := ExtractLatencyMetrics(latencies)
|
|
e2elog.Logf("10%% %s: %v", header, latencies[(len(latencies)*9)/10:])
|
|
e2elog.Logf("perc50: %v, perc90: %v, perc99: %v", metrics.Perc50, metrics.Perc90, metrics.Perc99)
|
|
}
|