delete unused func in latencies.go
This commit is contained in:
		@@ -27,17 +27,14 @@ go_library(
 | 
				
			|||||||
        "//pkg/kubelet/dockershim/metrics:go_default_library",
 | 
					        "//pkg/kubelet/dockershim/metrics:go_default_library",
 | 
				
			||||||
        "//pkg/kubelet/metrics:go_default_library",
 | 
					        "//pkg/kubelet/metrics:go_default_library",
 | 
				
			||||||
        "//pkg/master/ports:go_default_library",
 | 
					        "//pkg/master/ports:go_default_library",
 | 
				
			||||||
        "//pkg/scheduler/metrics:go_default_library",
 | 
					 | 
				
			||||||
        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
 | 
					        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
 | 
				
			||||||
        "//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
 | 
					        "//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
 | 
				
			||||||
        "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
 | 
					        "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
 | 
				
			||||||
        "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
 | 
					        "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
 | 
				
			||||||
        "//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
 | 
					        "//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
 | 
				
			||||||
        "//test/e2e/framework/log:go_default_library",
 | 
					        "//test/e2e/framework/log:go_default_library",
 | 
				
			||||||
        "//test/e2e/framework/ssh:go_default_library",
 | 
					 | 
				
			||||||
        "//test/e2e/perftype:go_default_library",
 | 
					        "//test/e2e/perftype:go_default_library",
 | 
				
			||||||
        "//test/e2e/system:go_default_library",
 | 
					        "//test/e2e/system:go_default_library",
 | 
				
			||||||
        "//vendor/github.com/onsi/gomega:go_default_library",
 | 
					 | 
				
			||||||
        "//vendor/k8s.io/klog:go_default_library",
 | 
					        "//vendor/k8s.io/klog:go_default_library",
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -18,7 +18,6 @@ package metrics
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
	"time"
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	e2eperftype "k8s.io/kubernetes/test/e2e/perftype"
 | 
						e2eperftype "k8s.io/kubernetes/test/e2e/perftype"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
@@ -61,51 +60,6 @@ func (a *APIResponsiveness) Less(i, j int) bool {
 | 
				
			|||||||
	return a.APICalls[i].Latency.Perc99 < a.APICalls[j].Latency.Perc99
 | 
						return a.APICalls[i].Latency.Perc99 < a.APICalls[j].Latency.Perc99
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Set request latency for a particular quantile in the APICall metric entry (creating one if necessary).
 | 
					 | 
				
			||||||
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
 | 
					 | 
				
			||||||
// Only 0.5, 0.9 and 0.99 quantiles are supported.
 | 
					 | 
				
			||||||
func (a *APIResponsiveness) addMetricRequestLatency(resource, subresource, verb, scope string, quantile float64, latency time.Duration) {
 | 
					 | 
				
			||||||
	for i, apicall := range a.APICalls {
 | 
					 | 
				
			||||||
		if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb && apicall.Scope == scope {
 | 
					 | 
				
			||||||
			a.APICalls[i] = setQuantileAPICall(apicall, quantile, latency)
 | 
					 | 
				
			||||||
			return
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	apicall := setQuantileAPICall(APICall{Resource: resource, Subresource: subresource, Verb: verb, Scope: scope}, quantile, latency)
 | 
					 | 
				
			||||||
	a.APICalls = append(a.APICalls, apicall)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
 | 
					 | 
				
			||||||
// Only 0.5, 0.9 and 0.99 quantiles are supported.
 | 
					 | 
				
			||||||
func setQuantileAPICall(apicall APICall, quantile float64, latency time.Duration) APICall {
 | 
					 | 
				
			||||||
	setQuantile(&apicall.Latency, quantile, latency)
 | 
					 | 
				
			||||||
	return apicall
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Only 0.5, 0.9 and 0.99 quantiles are supported.
 | 
					 | 
				
			||||||
func setQuantile(metric *LatencyMetric, quantile float64, latency time.Duration) {
 | 
					 | 
				
			||||||
	switch quantile {
 | 
					 | 
				
			||||||
	case 0.5:
 | 
					 | 
				
			||||||
		metric.Perc50 = latency
 | 
					 | 
				
			||||||
	case 0.9:
 | 
					 | 
				
			||||||
		metric.Perc90 = latency
 | 
					 | 
				
			||||||
	case 0.99:
 | 
					 | 
				
			||||||
		metric.Perc99 = latency
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Add request count to the APICall metric entry (creating one if necessary).
 | 
					 | 
				
			||||||
func (a *APIResponsiveness) addMetricRequestCount(resource, subresource, verb, scope string, count int) {
 | 
					 | 
				
			||||||
	for i, apicall := range a.APICalls {
 | 
					 | 
				
			||||||
		if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb && apicall.Scope == scope {
 | 
					 | 
				
			||||||
			a.APICalls[i].Count += count
 | 
					 | 
				
			||||||
			return
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	apicall := APICall{Resource: resource, Subresource: subresource, Verb: verb, Count: count, Scope: scope}
 | 
					 | 
				
			||||||
	a.APICalls = append(a.APICalls, apicall)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// currentAPICallMetricsVersion is the current apicall performance metrics version. We should
 | 
					// currentAPICallMetricsVersion is the current apicall performance metrics version. We should
 | 
				
			||||||
// bump up the version each time we make incompatible change to the metrics.
 | 
					// bump up the version each time we make incompatible change to the metrics.
 | 
				
			||||||
const currentAPICallMetricsVersion = "v1"
 | 
					const currentAPICallMetricsVersion = "v1"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -17,25 +17,11 @@ limitations under the License.
 | 
				
			|||||||
package metrics
 | 
					package metrics
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"context"
 | 
					 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
	"math"
 | 
						"math"
 | 
				
			||||||
	"sort"
 | 
					 | 
				
			||||||
	"strconv"
 | 
					 | 
				
			||||||
	"strings"
 | 
					 | 
				
			||||||
	"time"
 | 
						"time"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
					 | 
				
			||||||
	"k8s.io/apimachinery/pkg/util/sets"
 | 
					 | 
				
			||||||
	clientset "k8s.io/client-go/kubernetes"
 | 
					 | 
				
			||||||
	"k8s.io/component-base/metrics/testutil"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/master/ports"
 | 
					 | 
				
			||||||
	schedulermetric "k8s.io/kubernetes/pkg/scheduler/metrics"
 | 
					 | 
				
			||||||
	e2elog "k8s.io/kubernetes/test/e2e/framework/log"
 | 
						e2elog "k8s.io/kubernetes/test/e2e/framework/log"
 | 
				
			||||||
	e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/test/e2e/system"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	"github.com/onsi/gomega"
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const (
 | 
					const (
 | 
				
			||||||
@@ -43,119 +29,8 @@ const (
 | 
				
			|||||||
	// transient failures from failing tests.
 | 
						// transient failures from failing tests.
 | 
				
			||||||
	// TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
 | 
						// TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
 | 
				
			||||||
	SingleCallTimeout = 5 * time.Minute
 | 
						SingleCallTimeout = 5 * time.Minute
 | 
				
			||||||
 | 
					 | 
				
			||||||
	// nodeStartupThreshold is a rough estimate of the time allocated for a pod to start on a node.
 | 
					 | 
				
			||||||
	nodeStartupThreshold = 4 * time.Second
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// We are setting 1s threshold for apicalls even in small clusters to avoid flakes.
 | 
					 | 
				
			||||||
	// The problem is that if long GC is happening in small clusters (where we have e.g.
 | 
					 | 
				
			||||||
	// 1-core master machines) and tests are pretty short, it may consume significant
 | 
					 | 
				
			||||||
	// portion of CPU and basically stop all the real work.
 | 
					 | 
				
			||||||
	// Increasing threshold to 1s is within our SLO and should solve this problem.
 | 
					 | 
				
			||||||
	apiCallLatencyThreshold time.Duration = 1 * time.Second
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// We use a higher threshold for list apicalls if the cluster is big (i.e having > 500 nodes)
 | 
					 | 
				
			||||||
	// as list response sizes are bigger in general for big clusters. We also use a higher threshold
 | 
					 | 
				
			||||||
	// for list calls at cluster scope (this includes non-namespaced and all-namespaced calls).
 | 
					 | 
				
			||||||
	apiListCallLatencyThreshold      time.Duration = 5 * time.Second
 | 
					 | 
				
			||||||
	apiClusterScopeListCallThreshold time.Duration = 10 * time.Second
 | 
					 | 
				
			||||||
	bigClusterNodeCountThreshold                   = 500
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var schedulingLatencyMetricName = schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
 | 
					 | 
				
			||||||
	var a APIResponsiveness
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	body, err := getMetrics(c)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	samples, err := testutil.ExtractMetricSamples(body)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ignoredResources := sets.NewString("events")
 | 
					 | 
				
			||||||
	// TODO: figure out why we're getting non-capitalized proxy and fix this.
 | 
					 | 
				
			||||||
	ignoredVerbs := sets.NewString("WATCH", "WATCHLIST", "PROXY", "proxy", "CONNECT")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for _, sample := range samples {
 | 
					 | 
				
			||||||
		// Example line:
 | 
					 | 
				
			||||||
		// apiserver_request_latencies_summary{resource="namespaces",verb="LIST",quantile="0.99"} 908
 | 
					 | 
				
			||||||
		// apiserver_request_total{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
 | 
					 | 
				
			||||||
		if sample.Metric[testutil.MetricNameLabel] != "apiserver_request_latencies_summary" &&
 | 
					 | 
				
			||||||
			sample.Metric[testutil.MetricNameLabel] != "apiserver_request_total" {
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		resource := string(sample.Metric["resource"])
 | 
					 | 
				
			||||||
		subresource := string(sample.Metric["subresource"])
 | 
					 | 
				
			||||||
		verb := string(sample.Metric["verb"])
 | 
					 | 
				
			||||||
		scope := string(sample.Metric["scope"])
 | 
					 | 
				
			||||||
		if ignoredResources.Has(resource) || ignoredVerbs.Has(verb) {
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		switch sample.Metric[testutil.MetricNameLabel] {
 | 
					 | 
				
			||||||
		case "apiserver_request_latencies_summary":
 | 
					 | 
				
			||||||
			latency := sample.Value
 | 
					 | 
				
			||||||
			quantile, err := strconv.ParseFloat(string(sample.Metric[testutil.QuantileLabel]), 64)
 | 
					 | 
				
			||||||
			if err != nil {
 | 
					 | 
				
			||||||
				return nil, err
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond)
 | 
					 | 
				
			||||||
		case "apiserver_request_total":
 | 
					 | 
				
			||||||
			count := sample.Value
 | 
					 | 
				
			||||||
			a.addMetricRequestCount(resource, subresource, verb, scope, int(count))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return &a, err
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// HighLatencyRequests prints top five summary metrics for request types with latency and returns
 | 
					 | 
				
			||||||
// number of such request types above threshold. We use a higher threshold for
 | 
					 | 
				
			||||||
// list calls if nodeCount is above a given threshold (i.e. cluster is big).
 | 
					 | 
				
			||||||
func HighLatencyRequests(c clientset.Interface, nodeCount int) (int, *APIResponsiveness, error) {
 | 
					 | 
				
			||||||
	isBigCluster := (nodeCount > bigClusterNodeCountThreshold)
 | 
					 | 
				
			||||||
	metrics, err := readLatencyMetrics(c)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return 0, metrics, err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	sort.Sort(sort.Reverse(metrics))
 | 
					 | 
				
			||||||
	badMetrics := 0
 | 
					 | 
				
			||||||
	top := 5
 | 
					 | 
				
			||||||
	for i := range metrics.APICalls {
 | 
					 | 
				
			||||||
		latency := metrics.APICalls[i].Latency.Perc99
 | 
					 | 
				
			||||||
		isListCall := (metrics.APICalls[i].Verb == "LIST")
 | 
					 | 
				
			||||||
		isClusterScopedCall := (metrics.APICalls[i].Scope == "cluster")
 | 
					 | 
				
			||||||
		isBad := false
 | 
					 | 
				
			||||||
		latencyThreshold := apiCallLatencyThreshold
 | 
					 | 
				
			||||||
		if isListCall && isBigCluster {
 | 
					 | 
				
			||||||
			latencyThreshold = apiListCallLatencyThreshold
 | 
					 | 
				
			||||||
			if isClusterScopedCall {
 | 
					 | 
				
			||||||
				latencyThreshold = apiClusterScopeListCallThreshold
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if latency > latencyThreshold {
 | 
					 | 
				
			||||||
			isBad = true
 | 
					 | 
				
			||||||
			badMetrics++
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if top > 0 || isBad {
 | 
					 | 
				
			||||||
			top--
 | 
					 | 
				
			||||||
			prefix := ""
 | 
					 | 
				
			||||||
			if isBad {
 | 
					 | 
				
			||||||
				prefix = "WARNING "
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			e2elog.Logf("%vTop latency metric: %+v", prefix, metrics.APICalls[i])
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return badMetrics, metrics, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// VerifyLatencyWithinThreshold verifies whether 50, 90 and 99th percentiles of a latency metric are
 | 
					// VerifyLatencyWithinThreshold verifies whether 50, 90 and 99th percentiles of a latency metric are
 | 
				
			||||||
// within the expected threshold.
 | 
					// within the expected threshold.
 | 
				
			||||||
func VerifyLatencyWithinThreshold(threshold, actual LatencyMetric, metricName string) error {
 | 
					func VerifyLatencyWithinThreshold(threshold, actual LatencyMetric, metricName string) error {
 | 
				
			||||||
@@ -171,147 +46,6 @@ func VerifyLatencyWithinThreshold(threshold, actual LatencyMetric, metricName st
 | 
				
			|||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ResetMetrics resets latency metrics in apiserver.
 | 
					 | 
				
			||||||
func ResetMetrics(c clientset.Interface) error {
 | 
					 | 
				
			||||||
	e2elog.Logf("Resetting latency metrics in apiserver...")
 | 
					 | 
				
			||||||
	body, err := c.CoreV1().RESTClient().Delete().AbsPath("/metrics").DoRaw()
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if string(body) != "metrics reset\n" {
 | 
					 | 
				
			||||||
		return fmt.Errorf("Unexpected response: %q", string(body))
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Retrieves metrics information.
 | 
					 | 
				
			||||||
func getMetrics(c clientset.Interface) (string, error) {
 | 
					 | 
				
			||||||
	body, err := c.CoreV1().RESTClient().Get().AbsPath("/metrics").DoRaw()
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return "", err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return string(body), nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Sends REST request to kube scheduler metrics
 | 
					 | 
				
			||||||
func sendRestRequestToScheduler(c clientset.Interface, op, provider, cloudMasterName, masterHostname string) (string, error) {
 | 
					 | 
				
			||||||
	opUpper := strings.ToUpper(op)
 | 
					 | 
				
			||||||
	if opUpper != "GET" && opUpper != "DELETE" {
 | 
					 | 
				
			||||||
		return "", fmt.Errorf("Unknown REST request")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
 | 
					 | 
				
			||||||
	// The following 4 lines are intended to replace framework.ExpectNoError(err)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		e2elog.Logf("Unexpected error occurred: %v", err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	var masterRegistered = false
 | 
					 | 
				
			||||||
	for _, node := range nodes.Items {
 | 
					 | 
				
			||||||
		if system.DeprecatedMightBeMasterNode(node.Name) {
 | 
					 | 
				
			||||||
			masterRegistered = true
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	var responseText string
 | 
					 | 
				
			||||||
	if masterRegistered {
 | 
					 | 
				
			||||||
		ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout)
 | 
					 | 
				
			||||||
		defer cancel()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		body, err := c.CoreV1().RESTClient().Verb(opUpper).
 | 
					 | 
				
			||||||
			Context(ctx).
 | 
					 | 
				
			||||||
			Namespace(metav1.NamespaceSystem).
 | 
					 | 
				
			||||||
			Resource("pods").
 | 
					 | 
				
			||||||
			Name(fmt.Sprintf("kube-scheduler-%v:%v", cloudMasterName, ports.InsecureSchedulerPort)).
 | 
					 | 
				
			||||||
			SubResource("proxy").
 | 
					 | 
				
			||||||
			Suffix("metrics").
 | 
					 | 
				
			||||||
			Do().Raw()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		// The following 4 lines are intended to replace
 | 
					 | 
				
			||||||
		// framework.ExpectNoError(err).
 | 
					 | 
				
			||||||
		if err != nil {
 | 
					 | 
				
			||||||
			e2elog.Logf("Unexpected error occurred: %v", err)
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
 | 
					 | 
				
			||||||
		responseText = string(body)
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		// If master is not registered fall back to old method of using SSH.
 | 
					 | 
				
			||||||
		if provider == "gke" || provider == "eks" {
 | 
					 | 
				
			||||||
			e2elog.Logf("Not grabbing scheduler metrics through master SSH: unsupported for %s", provider)
 | 
					 | 
				
			||||||
			return "", nil
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		cmd := "curl -X " + opUpper + " http://localhost:10251/metrics"
 | 
					 | 
				
			||||||
		sshResult, err := e2essh.SSH(cmd, masterHostname+":22", provider)
 | 
					 | 
				
			||||||
		if err != nil || sshResult.Code != 0 {
 | 
					 | 
				
			||||||
			return "", fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		responseText = sshResult.Stdout
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return responseText, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Retrieves scheduler latency metrics.
 | 
					 | 
				
			||||||
func getSchedulingLatency(c clientset.Interface, provider, cloudMasterName, masterHostname string) (*SchedulingMetrics, error) {
 | 
					 | 
				
			||||||
	result := SchedulingMetrics{}
 | 
					 | 
				
			||||||
	data, err := sendRestRequestToScheduler(c, "GET", provider, cloudMasterName, masterHostname)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	samples, err := testutil.ExtractMetricSamples(data)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for _, sample := range samples {
 | 
					 | 
				
			||||||
		if string(sample.Metric[testutil.MetricNameLabel]) != schedulingLatencyMetricName {
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		var metric *LatencyMetric
 | 
					 | 
				
			||||||
		switch sample.Metric[schedulermetric.OperationLabel] {
 | 
					 | 
				
			||||||
		case schedulermetric.PredicateEvaluation:
 | 
					 | 
				
			||||||
			metric = &result.PredicateEvaluationLatency
 | 
					 | 
				
			||||||
		case schedulermetric.PriorityEvaluation:
 | 
					 | 
				
			||||||
			metric = &result.PriorityEvaluationLatency
 | 
					 | 
				
			||||||
		case schedulermetric.PreemptionEvaluation:
 | 
					 | 
				
			||||||
			metric = &result.PreemptionEvaluationLatency
 | 
					 | 
				
			||||||
		case schedulermetric.Binding:
 | 
					 | 
				
			||||||
			metric = &result.BindingLatency
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if metric == nil {
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		quantile, err := strconv.ParseFloat(string(sample.Metric[testutil.QuantileLabel]), 64)
 | 
					 | 
				
			||||||
		if err != nil {
 | 
					 | 
				
			||||||
			return nil, err
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		setQuantile(metric, quantile, time.Duration(int64(float64(sample.Value)*float64(time.Second))))
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return &result, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// VerifySchedulerLatency verifies (currently just by logging them) the scheduling latencies.
 | 
					 | 
				
			||||||
func VerifySchedulerLatency(c clientset.Interface, provider, cloudMasterName, masterHostname string) (*SchedulingMetrics, error) {
 | 
					 | 
				
			||||||
	latency, err := getSchedulingLatency(c, provider, cloudMasterName, masterHostname)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, err
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return latency, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// ResetSchedulerMetrics sends a DELETE request to kube-scheduler for resetting metrics.
 | 
					 | 
				
			||||||
func ResetSchedulerMetrics(c clientset.Interface, provider, cloudMasterName, masterHostname string) error {
 | 
					 | 
				
			||||||
	responseText, err := sendRestRequestToScheduler(c, "DELETE", provider, cloudMasterName, masterHostname)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return fmt.Errorf("Unexpected response: %q, %v", responseText, err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// PodLatencyData encapsulates pod startup latency information.
 | 
					// PodLatencyData encapsulates pod startup latency information.
 | 
				
			||||||
type PodLatencyData struct {
 | 
					type PodLatencyData struct {
 | 
				
			||||||
	// Name of the pod
 | 
						// Name of the pod
 | 
				
			||||||
@@ -339,21 +73,6 @@ func ExtractLatencyMetrics(latencies []PodLatencyData) LatencyMetric {
 | 
				
			|||||||
	return LatencyMetric{Perc50: perc50, Perc90: perc90, Perc99: perc99, Perc100: perc100}
 | 
						return LatencyMetric{Perc50: perc50, Perc90: perc90, Perc99: perc99, Perc100: perc100}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// LogSuspiciousLatency logs metrics/docker errors from all nodes that had slow startup times
 | 
					 | 
				
			||||||
// If latencyDataLag is nil then it will be populated from latencyData
 | 
					 | 
				
			||||||
func LogSuspiciousLatency(latencyData []PodLatencyData, latencyDataLag []PodLatencyData, nodeCount int, c clientset.Interface) {
 | 
					 | 
				
			||||||
	if latencyDataLag == nil {
 | 
					 | 
				
			||||||
		latencyDataLag = latencyData
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	for _, l := range latencyData {
 | 
					 | 
				
			||||||
		if l.Latency > nodeStartupThreshold {
 | 
					 | 
				
			||||||
			HighLatencyKubeletOperations(c, 1*time.Second, l.Node, e2elog.Logf)
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	e2elog.Logf("Approx throughput: %v pods/min",
 | 
					 | 
				
			||||||
		float64(nodeCount)/(latencyDataLag[len(latencyDataLag)-1].Latency.Minutes()))
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// PrintLatencies outputs latencies to log with readable format.
 | 
					// PrintLatencies outputs latencies to log with readable format.
 | 
				
			||||||
func PrintLatencies(latencies []PodLatencyData, header string) {
 | 
					func PrintLatencies(latencies []PodLatencyData, header string) {
 | 
				
			||||||
	metrics := ExtractLatencyMetrics(latencies)
 | 
						metrics := ExtractLatencyMetrics(latencies)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user