Add a flag that will make test gather metrics from all running components after the test finishes.
This commit is contained in:
		| @@ -113,6 +113,7 @@ from-literal | ||||
| func-dest | ||||
| fuzz-iters | ||||
| gather-logs-sizes | ||||
| gather-metrics-at-teardown | ||||
| gather-resource-usage | ||||
| gce-project | ||||
| gce-service-account | ||||
|   | ||||
| @@ -42,8 +42,6 @@ var KnownApiServerMetrics = map[string][]string{ | ||||
| 	"etcd_request_latencies_summary":                 {"operation", "type", "quantile"}, | ||||
| 	"etcd_request_latencies_summary_count":           {"operation", "type"}, | ||||
| 	"etcd_request_latencies_summary_sum":             {"operation", "type"}, | ||||
| 	"get_token_count":                                {}, | ||||
| 	"get_token_fail_count":                           {}, | ||||
| 	"rest_client_request_latency_microseconds":       {"url", "verb", "quantile"}, | ||||
| 	"rest_client_request_latency_microseconds_count": {"url", "verb"}, | ||||
| 	"rest_client_request_latency_microseconds_sum":   {"url", "verb"}, | ||||
|   | ||||
| @@ -29,38 +29,55 @@ import ( | ||||
| ) | ||||
|  | ||||
| var CommonMetrics = map[string][]string{ | ||||
| 	"process_start_time_seconds":    {}, | ||||
| 	"process_resident_memory_bytes": {}, | ||||
| 	"process_virtual_memory_bytes":  {}, | ||||
| 	"process_cpu_seconds_total":     {}, | ||||
| 	"process_max_fds":               {}, | ||||
| 	"process_open_fds":              {}, | ||||
|  | ||||
| 	"http_request_size_bytes":                  {"handler", "quantile"}, | ||||
| 	"http_request_size_bytes_count":            {"handler"}, | ||||
| 	"http_request_size_bytes_sum":              {"handler"}, | ||||
| 	"http_request_duration_microseconds":       {"handler", "quantile"}, | ||||
| 	"http_request_duration_microseconds_count": {"handler"}, | ||||
| 	"http_request_duration_microseconds_sum":   {"handler"}, | ||||
| 	"http_requests_total":                      {"handler", "method", "code"}, | ||||
|  | ||||
| 	"http_response_size_bytes":       {"handler", "quantile"}, | ||||
| 	"http_response_size_bytes_count": {"handler"}, | ||||
| 	"http_response_size_bytes_sum":   {"handler"}, | ||||
|  | ||||
| 	"ssh_tunnel_open_fail_count": {}, | ||||
| 	"ssh_tunnel_open_count":      {}, | ||||
|  | ||||
| 	"get_token_count":                          {}, | ||||
| 	"get_token_fail_count":                     {}, | ||||
| 	"go_gc_duration_seconds":                   {"quantile"}, | ||||
| 	"go_gc_duration_seconds_count":             {}, | ||||
| 	"go_gc_duration_seconds_sum":               {}, | ||||
| 	"go_goroutines":                            {}, | ||||
|  | ||||
| 	"http_request_duration_microseconds":       {"handler", "quantile"}, | ||||
| 	"http_request_duration_microseconds_count": {"handler"}, | ||||
| 	"http_request_duration_microseconds_sum":   {"handler"}, | ||||
| 	"http_request_size_bytes":                  {"handler", "quantile"}, | ||||
| 	"http_request_size_bytes_count":            {"handler"}, | ||||
| 	"http_request_size_bytes_sum":              {"handler"}, | ||||
| 	"http_requests_total":                      {"handler", "method", "code"}, | ||||
| 	"http_response_size_bytes":                 {"handler", "quantile"}, | ||||
| 	"http_response_size_bytes_count":           {"handler"}, | ||||
| 	"http_response_size_bytes_sum":             {"handler"}, | ||||
| 	"kubernetes_build_info":                    {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"}, | ||||
| 	"process_cpu_seconds_total":                {}, | ||||
| 	"process_max_fds":                          {}, | ||||
| 	"process_open_fds":                         {}, | ||||
| 	"process_resident_memory_bytes":            {}, | ||||
| 	"process_start_time_seconds":               {}, | ||||
| 	"process_virtual_memory_bytes":             {}, | ||||
| 	"ssh_tunnel_open_count":                    {}, | ||||
| 	"ssh_tunnel_open_fail_count":               {}, | ||||
| } | ||||
|  | ||||
| type Metrics map[string]model.Samples | ||||
|  | ||||
| func PrintSample(sample *model.Sample) string { | ||||
| 	buf := make([]string, 0) | ||||
| 	// Id is a VERY special label. For 'normal' container it's usless, but it's necessary | ||||
| 	// for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the | ||||
| 	// case by checking if there's a label "kubernetes_container_name" present. It's hacky | ||||
| 	// but it works... | ||||
| 	_, normalContainer := sample.Metric["kubernetes_container_name"] | ||||
| 	for k, v := range sample.Metric { | ||||
| 		if strings.HasPrefix(string(k), "__") || KubeletMetricsLabelsToSkip.Has(string(k)) { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		if string(k) == "id" && normalContainer { | ||||
| 			continue | ||||
| 		} | ||||
| 		buf = append(buf, fmt.Sprintf("%v=%v", string(k), v)) | ||||
| 	} | ||||
| 	return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value) | ||||
| } | ||||
|  | ||||
| func NewMetrics() Metrics { | ||||
| 	result := make(Metrics) | ||||
| 	for metric := range CommonMetrics { | ||||
|   | ||||
| @@ -61,8 +61,6 @@ var KnownKubeletMetrics = map[string][]string{ | ||||
| 	"container_spec_memory_swap_limit_bytes":                 {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, | ||||
| 	"container_start_time_seconds":                           {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, | ||||
| 	"container_tasks_state":                                  {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"}, | ||||
| 	"get_token_count":                                        {}, | ||||
| 	"get_token_fail_count":                                   {}, | ||||
| 	"kubelet_container_manager_latency_microseconds":         {"operation_type", "quantile"}, | ||||
| 	"kubelet_container_manager_latency_microseconds_count":   {"operation_type"}, | ||||
| 	"kubelet_container_manager_latency_microseconds_sum":     {"operation_type"}, | ||||
| @@ -98,6 +96,12 @@ var KnownKubeletMetrics = map[string][]string{ | ||||
| 	"rest_client_request_status_codes":                       {"code", "host", "method"}, | ||||
| } | ||||
|  | ||||
| var KubeletMetricsLabelsToSkip = sets.NewString( | ||||
| 	"kubernetes_namespace", | ||||
| 	"image", | ||||
| 	"name", | ||||
| ) | ||||
|  | ||||
| type KubeletMetrics Metrics | ||||
|  | ||||
| func NewKubeletMetrics() KubeletMetrics { | ||||
|   | ||||
| @@ -90,6 +90,7 @@ func init() { | ||||
| 	flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.") | ||||
| 	flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.") | ||||
| 	flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.") | ||||
| 	flag.BoolVar(&testContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.") | ||||
| } | ||||
|  | ||||
| func TestE2E(t *testing.T) { | ||||
|   | ||||
| @@ -26,6 +26,7 @@ import ( | ||||
| 	"k8s.io/kubernetes/pkg/api" | ||||
| 	client "k8s.io/kubernetes/pkg/client/unversioned" | ||||
| 	"k8s.io/kubernetes/pkg/fields" | ||||
| 	"k8s.io/kubernetes/pkg/metrics" | ||||
|  | ||||
| 	. "github.com/onsi/ginkgo" | ||||
| 	. "github.com/onsi/gomega" | ||||
| @@ -152,6 +153,38 @@ func (f *Framework) afterEach() { | ||||
| 		close(f.logsSizeCloseChannel) | ||||
| 		f.logsSizeWaitGroup.Wait() | ||||
| 	} | ||||
|  | ||||
| 	if testContext.GatherMetricsAfterTest { | ||||
| 		// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered. | ||||
| 		grabber, err := metrics.NewMetricsGrabber(f.Client, true, false, false, true) | ||||
| 		if err != nil { | ||||
| 			Logf("Failed to create MetricsGrabber. Skipping metrics gathering.") | ||||
| 		} else { | ||||
| 			received, err := grabber.Grab(nil) | ||||
| 			if err != nil { | ||||
| 				Logf("MetricsGrabber failed grab metrics. Skipping metrics gathering.") | ||||
| 			} else { | ||||
| 				buf := bytes.Buffer{} | ||||
| 				for interestingMetric := range InterestingApiServerMetrics { | ||||
| 					buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric)) | ||||
| 					for _, sample := range received.ApiServerMetrics[interestingMetric] { | ||||
| 						buf.WriteString(fmt.Sprintf("\t%v\n", metrics.PrintSample(sample))) | ||||
| 					} | ||||
| 				} | ||||
| 				for kubelet, grabbed := range received.KubeletMetrics { | ||||
| 					buf.WriteString(fmt.Sprintf("For %v:\n", kubelet)) | ||||
| 					for interestingMetric := range InterestingKubeletMetrics { | ||||
| 						buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric)) | ||||
| 						for _, sample := range grabbed[interestingMetric] { | ||||
| 							buf.WriteString(fmt.Sprintf("\t\t%v\n", metrics.PrintSample(sample))) | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 				Logf("%v", buf.String()) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Paranoia-- prevent reuse! | ||||
| 	f.Namespace = nil | ||||
| 	f.Client = nil | ||||
|   | ||||
| @@ -46,6 +46,48 @@ const ( | ||||
| 	apiCallLatencyLargeThreshold  time.Duration = 1 * time.Second | ||||
| ) | ||||
|  | ||||
| var InterestingApiServerMetrics = sets.NewString( | ||||
| 	"apiserver_request_count", | ||||
| 	"apiserver_request_latencies_bucket", | ||||
| 	"etcd_helper_cache_entry_count", | ||||
| 	"etcd_helper_cache_hit_count", | ||||
| 	"etcd_helper_cache_miss_count", | ||||
| 	"etcd_request_cache_add_latencies_summary", | ||||
| 	"etcd_request_cache_get_latencies_summary", | ||||
| 	"etcd_request_latencies_summary", | ||||
| 	"go_gc_duration_seconds", | ||||
| 	"go_goroutines", | ||||
| 	"process_cpu_seconds_total", | ||||
| 	"process_open_fds", | ||||
| 	"process_resident_memory_bytes", | ||||
| 	"process_start_time_seconds", | ||||
| 	"process_virtual_memory_bytes", | ||||
| ) | ||||
|  | ||||
| var InterestingKubeletMetrics = sets.NewString( | ||||
| 	"container_cpu_system_seconds_total", | ||||
| 	"container_cpu_user_seconds_total", | ||||
| 	"container_fs_io_time_weighted_seconds_total", | ||||
| 	"container_memory_usage_bytes", | ||||
| 	"container_spec_cpu_shares", | ||||
| 	"container_start_time_seconds", | ||||
| 	"go_gc_duration_seconds", | ||||
| 	"go_goroutines", | ||||
| 	"kubelet_container_manager_latency_microseconds", | ||||
| 	"kubelet_docker_errors", | ||||
| 	"kubelet_docker_operations_latency_microseconds", | ||||
| 	"kubelet_generate_pod_status_latency_microseconds", | ||||
| 	"kubelet_pod_start_latency_microseconds", | ||||
| 	"kubelet_pod_worker_latency_microseconds", | ||||
| 	"kubelet_pod_worker_start_latency_microseconds", | ||||
| 	"kubelet_sync_pods_latency_microseconds", | ||||
| 	"process_cpu_seconds_total", | ||||
| 	"process_open_fds", | ||||
| 	"process_resident_memory_bytes", | ||||
| 	"process_start_time_seconds", | ||||
| 	"process_virtual_memory_bytes", | ||||
| ) | ||||
|  | ||||
| // Dashboard metrics | ||||
| type LatencyMetric struct { | ||||
| 	Perc50 time.Duration `json:"Perc50"` | ||||
|   | ||||
| @@ -153,6 +153,7 @@ type TestContextType struct { | ||||
| 	// It will read the data every 30 seconds from all Nodes and print summary during afterEach. | ||||
| 	GatherKubeSystemResourceUsageData bool | ||||
| 	GatherLogsSizes                   bool | ||||
| 	GatherMetricsAfterTest            bool | ||||
| } | ||||
|  | ||||
| var testContext TestContextType | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 gmarek
					gmarek