Merge pull request #30568 from coufon/node_density_and_performance_test
Automatic merge from submit-queue Add tag [benchmark] to node-e2e-test where performance limits are not verified This PR adds a new tag "[benchmark]" to density and resource-usage node e2e test. The performance limits will not be verified at the end of benchmark tests.
This commit is contained in:
		| @@ -49,31 +49,36 @@ const ( | ||||
|  | ||||
| var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { | ||||
| 	const ( | ||||
| 		// the data collection time of `resource collector' and the standalone cadvisor | ||||
| 		// is not synchronizated. Therefore `resource collector' may miss data or | ||||
| 		// The data collection time of resource collector and the standalone cadvisor | ||||
| 		// is not synchronizated, so resource collector may miss data or | ||||
| 		// collect duplicated data | ||||
| 		monitoringInterval    = 500 * time.Millisecond | ||||
| 		sleepBeforeCreatePods = 30 * time.Second | ||||
| 		containerStatsPollingPeriod = 500 * time.Millisecond | ||||
| 	) | ||||
|  | ||||
| 	var ( | ||||
| 		ns       string | ||||
| 		nodeName string | ||||
| 		rc       *ResourceCollector | ||||
| 	) | ||||
|  | ||||
| 	f := framework.NewDefaultFramework("density-test") | ||||
| 	podType := "density_test_pod" | ||||
|  | ||||
| 	BeforeEach(func() { | ||||
| 		ns = f.Namespace.Name | ||||
| 		nodeName = framework.TestContext.NodeName | ||||
| 		// Start a standalone cadvisor pod using 'createSync', the pod is running when it returns | ||||
| 		createCadvisorPod(f) | ||||
| 		// Resource collector monitors fine-grain CPU/memory usage by a standalone Cadvisor with | ||||
| 		// 1s housingkeeping interval | ||||
| 		rc = NewResourceCollector(containerStatsPollingPeriod) | ||||
| 	}) | ||||
|  | ||||
| 	AfterEach(func() { | ||||
| 	}) | ||||
|  | ||||
| 	Context("create a batch of pods", func() { | ||||
| 		// TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark | ||||
| 		// TODO(coufon): the values are generous, set more precise limits with benchmark data | ||||
| 		// and add more tests | ||||
| 		dTests := []densityTest{ | ||||
| 			{ | ||||
| 				podsNr:   10, | ||||
| @@ -101,102 +106,55 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { | ||||
| 			itArg := testArg | ||||
| 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", | ||||
| 				itArg.podsNr, itArg.interval), func() { | ||||
| 				var ( | ||||
| 					mutex      = &sync.Mutex{} | ||||
| 					watchTimes = make(map[string]unversioned.Time, 0) | ||||
| 					stopCh     = make(chan struct{}) | ||||
| 				) | ||||
|  | ||||
| 				// create specifications of the test pods | ||||
| 				pods := newTestPods(itArg.podsNr, ImageRegistry[pauseImage], podType) | ||||
| 				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg) | ||||
|  | ||||
| 				// start a standalone cadvisor pod | ||||
| 				// it uses `createSync', so the pod is running when it returns | ||||
| 				createCadvisorPod(f) | ||||
|  | ||||
| 				// `resource collector' monitoring fine-grain CPU/memory usage by a standalone Cadvisor with | ||||
| 				// 1s housingkeeping interval | ||||
| 				rc := NewResourceCollector(monitoringInterval) | ||||
|  | ||||
| 				// the controller watches the change of pod status | ||||
| 				controller := newInformerWatchPod(f, mutex, watchTimes, podType) | ||||
| 				go controller.Run(stopCh) | ||||
|  | ||||
| 				// Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop' | ||||
| 				// does not response to pod creation immediately. Creating the first pod has a delay around 5s. | ||||
| 				// The node status has been `ready' so `wait and check node being ready' does not help here. | ||||
| 				// Now wait here for a grace period to have `syncLoop' be ready | ||||
| 				time.Sleep(sleepBeforeCreatePods) | ||||
|  | ||||
| 				// the density test only monitors the overhead of creating pod | ||||
| 				// or start earliest and call `rc.Reset()' here to clear the buffer | ||||
| 				rc.Start() | ||||
|  | ||||
| 				By("Creating a batch of pods") | ||||
| 				// it returns a map[`pod name']`creation time' as the creation timestamps | ||||
| 				createTimes := createBatchPodWithRateControl(f, pods, itArg.interval) | ||||
|  | ||||
| 				By("Waiting for all Pods to be observed by the watch...") | ||||
| 				// checks every 10s util all pods are running. it times out ater 10min | ||||
| 				Eventually(func() bool { | ||||
| 					return len(watchTimes) == itArg.podsNr | ||||
| 				}, 10*time.Minute, 10*time.Second).Should(BeTrue()) | ||||
|  | ||||
| 				if len(watchTimes) < itArg.podsNr { | ||||
| 					framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.") | ||||
| 				} | ||||
|  | ||||
| 				// stop the watching controller, and the resource collector | ||||
| 				close(stopCh) | ||||
| 				rc.Stop() | ||||
|  | ||||
| 				// data analyis | ||||
| 				var ( | ||||
| 					firstCreate unversioned.Time | ||||
| 					lastRunning unversioned.Time | ||||
| 					init        = true | ||||
| 					e2eLags     = make([]framework.PodLatencyData, 0) | ||||
| 				) | ||||
|  | ||||
| 				for name, create := range createTimes { | ||||
| 					watch, ok := watchTimes[name] | ||||
| 					Expect(ok).To(Equal(true)) | ||||
|  | ||||
| 					e2eLags = append(e2eLags, | ||||
| 						framework.PodLatencyData{Name: name, Latency: watch.Time.Sub(create.Time)}) | ||||
|  | ||||
| 					if !init { | ||||
| 						if firstCreate.Time.After(create.Time) { | ||||
| 							firstCreate = create | ||||
| 						} | ||||
| 						if lastRunning.Time.Before(watch.Time) { | ||||
| 							lastRunning = watch | ||||
| 						} | ||||
| 					} else { | ||||
| 						init = false | ||||
| 						firstCreate, lastRunning = create, watch | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| 				sort.Sort(framework.LatencySlice(e2eLags)) | ||||
|  | ||||
| 				// verify latency | ||||
| 				By("Verifying latency") | ||||
| 				verifyLatency(lastRunning.Time.Sub(firstCreate.Time), e2eLags, itArg) | ||||
| 				printAndVerifyLatency(batchLag, e2eLags, itArg, true) | ||||
|  | ||||
| 				// verify resource | ||||
| 				By("Verifying resource") | ||||
| 				verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc) | ||||
| 				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) | ||||
| 			}) | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	Context("create a batch of pods [Benchmark]", func() { | ||||
| 		dTests := []densityTest{ | ||||
| 			{ | ||||
| 				podsNr:   10, | ||||
| 				interval: 0 * time.Millisecond, | ||||
| 			}, | ||||
| 			{ | ||||
| 				podsNr:   35, | ||||
| 				interval: 0 * time.Millisecond, | ||||
| 			}, | ||||
| 			{ | ||||
| 				podsNr:   105, | ||||
| 				interval: 0 * time.Millisecond, | ||||
| 			}, | ||||
| 		} | ||||
|  | ||||
| 		for _, testArg := range dTests { | ||||
| 			itArg := testArg | ||||
| 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", | ||||
| 				itArg.podsNr, itArg.interval), func() { | ||||
|  | ||||
| 				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg) | ||||
|  | ||||
| 				By("Verifying latency") | ||||
| 				printAndVerifyLatency(batchLag, e2eLags, itArg, false) | ||||
|  | ||||
| 				By("Verifying resource") | ||||
| 				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false) | ||||
| 			}) | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	Context("create a sequence of pods", func() { | ||||
| 		// TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark | ||||
| 		dTests := []densityTest{ | ||||
| 			{ | ||||
| 				podsNr:   10, | ||||
| 				bgPodsNr: 10, | ||||
| 				bgPodsNr: 50, | ||||
| 				cpuLimits: framework.ContainersCPUSummary{ | ||||
| 					stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25}, | ||||
| 					stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60}, | ||||
| @@ -217,34 +175,46 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { | ||||
| 			itArg := testArg | ||||
| 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", | ||||
| 				itArg.podsNr, itArg.bgPodsNr), func() { | ||||
| 				bgPods := newTestPods(itArg.bgPodsNr, ImageRegistry[pauseImage], "background_pod") | ||||
| 				testPods := newTestPods(itArg.podsNr, ImageRegistry[pauseImage], podType) | ||||
|  | ||||
| 				createCadvisorPod(f) | ||||
| 				rc := NewResourceCollector(monitoringInterval) | ||||
| 				batchlag, e2eLags := runDensitySeqTest(f, rc, itArg) | ||||
|  | ||||
| 				By("Creating a batch of background pods") | ||||
| 				// creatBatch is synchronized | ||||
| 				// all pods are running when it returns | ||||
| 				f.PodClient().CreateBatch(bgPods) | ||||
|  | ||||
| 				time.Sleep(sleepBeforeCreatePods) | ||||
|  | ||||
| 				// starting resource monitoring | ||||
| 				rc.Start() | ||||
|  | ||||
| 				// do a sequential creation of pod (back to back) | ||||
| 				batchlag, e2eLags := createBatchPodSequential(f, testPods) | ||||
|  | ||||
| 				rc.Stop() | ||||
|  | ||||
| 				// verify latency | ||||
| 				By("Verifying latency") | ||||
| 				verifyLatency(batchlag, e2eLags, itArg) | ||||
| 				printAndVerifyLatency(batchlag, e2eLags, itArg, true) | ||||
|  | ||||
| 				// verify resource | ||||
| 				By("Verifying resource") | ||||
| 				verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc) | ||||
| 				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) | ||||
| 			}) | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	Context("create a sequence of pods [Benchmark]", func() { | ||||
| 		dTests := []densityTest{ | ||||
| 			{ | ||||
| 				podsNr:   10, | ||||
| 				bgPodsNr: 50, | ||||
| 			}, | ||||
| 			{ | ||||
| 				podsNr:   30, | ||||
| 				bgPodsNr: 50, | ||||
| 			}, | ||||
| 			{ | ||||
| 				podsNr:   50, | ||||
| 				bgPodsNr: 50, | ||||
| 			}, | ||||
| 		} | ||||
|  | ||||
| 		for _, testArg := range dTests { | ||||
| 			itArg := testArg | ||||
| 			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", | ||||
| 				itArg.podsNr, itArg.bgPodsNr), func() { | ||||
|  | ||||
| 				batchlag, e2eLags := runDensitySeqTest(f, rc, itArg) | ||||
|  | ||||
| 				By("Verifying latency") | ||||
| 				printAndVerifyLatency(batchlag, e2eLags, itArg, false) | ||||
|  | ||||
| 				By("Verifying resource") | ||||
| 				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false) | ||||
| 			}) | ||||
| 		} | ||||
| 	}) | ||||
| @@ -257,14 +227,116 @@ type densityTest struct { | ||||
| 	bgPodsNr int | ||||
| 	// interval between creating pod (rate control) | ||||
| 	interval time.Duration | ||||
| 	// resource bound | ||||
| 	// performance limits | ||||
| 	cpuLimits            framework.ContainersCPUSummary | ||||
| 	memLimits            framework.ResourceUsagePerContainer | ||||
| 	podStartupLimits     framework.LatencyMetric | ||||
| 	podBatchStartupLimit time.Duration | ||||
| } | ||||
|  | ||||
| // it creates a batch of pods concurrently, uses one goroutine for each creation. | ||||
| // runDensityBatchTest runs the density batch pod creation test | ||||
| func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) { | ||||
| 	const ( | ||||
| 		podType               = "density_test_pod" | ||||
| 		sleepBeforeCreatePods = 30 * time.Second | ||||
| 	) | ||||
| 	var ( | ||||
| 		mutex      = &sync.Mutex{} | ||||
| 		watchTimes = make(map[string]unversioned.Time, 0) | ||||
| 		stopCh     = make(chan struct{}) | ||||
| 	) | ||||
|  | ||||
| 	// create test pod data structure | ||||
| 	pods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], podType) | ||||
|  | ||||
| 	// the controller watches the change of pod status | ||||
| 	controller := newInformerWatchPod(f, mutex, watchTimes, podType) | ||||
| 	go controller.Run(stopCh) | ||||
| 	defer close(stopCh) | ||||
|  | ||||
| 	// TODO(coufon): in the test we found kubelet starts while it is busy on something, as a result 'syncLoop' | ||||
| 	// does not response to pod creation immediately. Creating the first pod has a delay around 5s. | ||||
| 	// The node status has already been 'ready' so `wait and check node being ready does not help here. | ||||
| 	// Now wait here for a grace period to let 'syncLoop' be ready | ||||
| 	time.Sleep(sleepBeforeCreatePods) | ||||
|  | ||||
| 	rc.Start() | ||||
| 	defer rc.Stop() | ||||
|  | ||||
| 	By("Creating a batch of pods") | ||||
| 	// It returns a map['pod name']'creation time' containing the creation timestamps | ||||
| 	createTimes := createBatchPodWithRateControl(f, pods, testArg.interval) | ||||
|  | ||||
| 	By("Waiting for all Pods to be observed by the watch...") | ||||
|  | ||||
| 	Eventually(func() bool { | ||||
| 		return len(watchTimes) == testArg.podsNr | ||||
| 	}, 10*time.Minute, 10*time.Second).Should(BeTrue()) | ||||
|  | ||||
| 	if len(watchTimes) < testArg.podsNr { | ||||
| 		framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.") | ||||
| 	} | ||||
|  | ||||
| 	// Analyze results | ||||
| 	var ( | ||||
| 		firstCreate unversioned.Time | ||||
| 		lastRunning unversioned.Time | ||||
| 		init        = true | ||||
| 		e2eLags     = make([]framework.PodLatencyData, 0) | ||||
| 	) | ||||
|  | ||||
| 	for name, create := range createTimes { | ||||
| 		watch, ok := watchTimes[name] | ||||
| 		Expect(ok).To(Equal(true)) | ||||
|  | ||||
| 		e2eLags = append(e2eLags, | ||||
| 			framework.PodLatencyData{Name: name, Latency: watch.Time.Sub(create.Time)}) | ||||
|  | ||||
| 		if !init { | ||||
| 			if firstCreate.Time.After(create.Time) { | ||||
| 				firstCreate = create | ||||
| 			} | ||||
| 			if lastRunning.Time.Before(watch.Time) { | ||||
| 				lastRunning = watch | ||||
| 			} | ||||
| 		} else { | ||||
| 			init = false | ||||
| 			firstCreate, lastRunning = create, watch | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	sort.Sort(framework.LatencySlice(e2eLags)) | ||||
| 	batchLag := lastRunning.Time.Sub(firstCreate.Time) | ||||
|  | ||||
| 	return batchLag, e2eLags | ||||
| } | ||||
|  | ||||
| // runDensitySeqTest runs the density sequential pod creation test | ||||
| func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) { | ||||
| 	const ( | ||||
| 		podType               = "density_test_pod" | ||||
| 		sleepBeforeCreatePods = 30 * time.Second | ||||
| 	) | ||||
| 	bgPods := newTestPods(testArg.bgPodsNr, ImageRegistry[pauseImage], "background_pod") | ||||
| 	testPods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], podType) | ||||
|  | ||||
| 	By("Creating a batch of background pods") | ||||
|  | ||||
| 	// CreatBatch is synchronized, all pods are running when it returns | ||||
| 	f.PodClient().CreateBatch(bgPods) | ||||
|  | ||||
| 	time.Sleep(sleepBeforeCreatePods) | ||||
|  | ||||
| 	rc.Start() | ||||
| 	defer rc.Stop() | ||||
|  | ||||
| 	// create pods sequentially (back-to-back) | ||||
| 	batchlag, e2eLags := createBatchPodSequential(f, testPods) | ||||
|  | ||||
| 	return batchlag, e2eLags | ||||
| } | ||||
|  | ||||
| // createBatchPodWithRateControl creates a batch of pods concurrently, uses one goroutine for each creation. | ||||
| // between creations there is an interval for throughput control | ||||
| func createBatchPodWithRateControl(f *framework.Framework, pods []*api.Pod, interval time.Duration) map[string]unversioned.Time { | ||||
| 	createTimes := make(map[string]unversioned.Time) | ||||
| @@ -286,7 +358,7 @@ func checkPodDeleted(f *framework.Framework, podName string) error { | ||||
| 	return errors.New("Pod Not Deleted") | ||||
| } | ||||
|  | ||||
| // get prometheus metric `pod start latency' from kubelet | ||||
| // getPodStartLatency gets prometheus metric 'pod start latency' from kubelet | ||||
| func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) { | ||||
| 	latencyMetrics := framework.KubeletLatencyMetrics{} | ||||
| 	ms, err := metrics.GrabKubeletMetricsWithoutProxy(node) | ||||
| @@ -367,32 +439,6 @@ func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes m | ||||
| 	return controller | ||||
| } | ||||
|  | ||||
| // verifyLatency verifies that whether pod creation latency satisfies the limit. | ||||
| func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest) { | ||||
| 	framework.PrintLatencies(e2eLags, "worst client e2e total latencies") | ||||
|  | ||||
| 	// Zhou: do not trust `kubelet' metrics since they are not reset! | ||||
| 	latencyMetrics, _ := getPodStartLatency(kubeletAddr) | ||||
| 	framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics)) | ||||
|  | ||||
| 	// check whether e2e pod startup time is acceptable. | ||||
| 	podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)} | ||||
| 	framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency)) | ||||
| 	framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency)) | ||||
|  | ||||
| 	// check bactch pod creation latency | ||||
| 	if testArg.podBatchStartupLimit > 0 { | ||||
| 		Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v", | ||||
| 			batchLag, testArg.podBatchStartupLimit) | ||||
| 	} | ||||
|  | ||||
| 	// calculate and log throughput | ||||
| 	throughputBatch := float64(testArg.podsNr) / batchLag.Minutes() | ||||
| 	framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch) | ||||
| 	throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes() | ||||
| 	framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential) | ||||
| } | ||||
|  | ||||
| // createBatchPodSequential creats pods back-to-back in sequence. | ||||
| func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Duration, []framework.PodLatencyData) { | ||||
| 	batchStartTime := unversioned.Now() | ||||
| @@ -401,9 +447,38 @@ func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Dur | ||||
| 		create := unversioned.Now() | ||||
| 		f.PodClient().CreateSync(pod) | ||||
| 		e2eLags = append(e2eLags, | ||||
| 			framework.PodLatencyData{Name: pod.ObjectMeta.Name, Latency: unversioned.Now().Time.Sub(create.Time)}) | ||||
| 			framework.PodLatencyData{Name: pod.Name, Latency: unversioned.Now().Time.Sub(create.Time)}) | ||||
| 	} | ||||
| 	batchLag := unversioned.Now().Time.Sub(batchStartTime.Time) | ||||
| 	sort.Sort(framework.LatencySlice(e2eLags)) | ||||
| 	return batchLag, e2eLags | ||||
| } | ||||
|  | ||||
| // printAndVerifyLatency verifies that whether pod creation latency satisfies the limit. | ||||
| func printAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest, isVerify bool) { | ||||
| 	framework.PrintLatencies(e2eLags, "worst client e2e total latencies") | ||||
|  | ||||
| 	// TODO(coufon): do not trust `kubelet' metrics since they are not reset! | ||||
| 	latencyMetrics, _ := getPodStartLatency(kubeletAddr) | ||||
| 	framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics)) | ||||
|  | ||||
| 	// check whether e2e pod startup time is acceptable. | ||||
| 	podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)} | ||||
| 	framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency)) | ||||
|  | ||||
| 	// calculate and log throughput | ||||
| 	throughputBatch := float64(testArg.podsNr) / batchLag.Minutes() | ||||
| 	framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch) | ||||
| 	throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes() | ||||
| 	framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential) | ||||
|  | ||||
| 	if isVerify { | ||||
| 		framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency)) | ||||
|  | ||||
| 		// check bactch pod creation latency | ||||
| 		if testArg.podBatchStartupLimit > 0 { | ||||
| 			Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v", | ||||
| 				batchLag, testArg.podBatchStartupLimit) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -461,7 +461,7 @@ func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string | ||||
| 	return seriesPerContainer | ||||
| } | ||||
|  | ||||
| // Zhou: code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go | ||||
| // Code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go | ||||
| // since they are not exposed | ||||
| const ( | ||||
| 	kubeletProcessName    = "kubelet" | ||||
|   | ||||
| @@ -35,12 +35,6 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { | ||||
| 	const ( | ||||
| 		// Interval to poll /stats/container on a node | ||||
| 		containerStatsPollingPeriod = 10 * time.Second | ||||
| 		// The monitoring time for one test. | ||||
| 		monitoringTime = 10 * time.Minute | ||||
| 		// The periodic reporting period. | ||||
| 		reportingPeriod = 5 * time.Minute | ||||
|  | ||||
| 		sleepAfterCreatePods = 10 * time.Second | ||||
| 	) | ||||
|  | ||||
| 	var ( | ||||
| @@ -54,6 +48,12 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { | ||||
| 	BeforeEach(func() { | ||||
| 		ns = f.Namespace.Name | ||||
| 		om = framework.NewRuntimeOperationMonitor(f.Client) | ||||
| 		// The test collects resource usage from a standalone Cadvisor pod. | ||||
| 		// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to | ||||
| 		// show the resource usage spikes. But changing its interval increases the overhead | ||||
| 		// of kubelet. Hence we use a Cadvisor pod. | ||||
| 		createCadvisorPod(f) | ||||
| 		rc = NewResourceCollector(containerStatsPollingPeriod) | ||||
| 	}) | ||||
|  | ||||
| 	AfterEach(func() { | ||||
| @@ -67,13 +67,11 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { | ||||
| 	Context("regular resource usage tracking", func() { | ||||
| 		rTests := []resourceTest{ | ||||
| 			{ | ||||
| 				podsPerNode: 10, | ||||
| 				pods: 10, | ||||
| 				cpuLimits: framework.ContainersCPUSummary{ | ||||
| 					stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30}, | ||||
| 					stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40}, | ||||
| 				}, | ||||
| 				// We set the memory limits generously because the distribution | ||||
| 				// of the addon pods affect the memory usage on each node. | ||||
| 				memLimits: framework.ResourceUsagePerContainer{ | ||||
| 					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024}, | ||||
| 					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024}, | ||||
| @@ -84,70 +82,95 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { | ||||
| 		for _, testArg := range rTests { | ||||
| 			itArg := testArg | ||||
|  | ||||
| 			podsPerNode := itArg.podsPerNode | ||||
| 			name := fmt.Sprintf("resource tracking for %d pods per node", podsPerNode) | ||||
|  | ||||
| 			It(name, func() { | ||||
| 				// The test collects resource usage from a standalone Cadvisor pod. | ||||
| 				// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to | ||||
| 				// show the resource usage spikes. But changing its interval increases the overhead | ||||
| 				// of kubelet. Hence we use a Cadvisor pod. | ||||
| 				createCadvisorPod(f) | ||||
| 				rc = NewResourceCollector(containerStatsPollingPeriod) | ||||
| 				rc.Start() | ||||
|  | ||||
| 				By("Creating a batch of Pods") | ||||
| 				pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod") | ||||
| 				for _, pod := range pods { | ||||
| 					f.PodClient().CreateSync(pod) | ||||
| 				} | ||||
|  | ||||
| 				// wait for a while to let the node be steady | ||||
| 				time.Sleep(sleepAfterCreatePods) | ||||
|  | ||||
| 				// Log once and flush the stats. | ||||
| 				rc.LogLatest() | ||||
| 				rc.Reset() | ||||
|  | ||||
| 				By("Start monitoring resource usage") | ||||
| 				// Periodically dump the cpu summary until the deadline is met. | ||||
| 				// Note that without calling framework.ResourceMonitor.Reset(), the stats | ||||
| 				// would occupy increasingly more memory. This should be fine | ||||
| 				// for the current test duration, but we should reclaim the | ||||
| 				// entries if we plan to monitor longer (e.g., 8 hours). | ||||
| 				deadline := time.Now().Add(monitoringTime) | ||||
| 				for time.Now().Before(deadline) { | ||||
| 					timeLeft := deadline.Sub(time.Now()) | ||||
| 					framework.Logf("Still running...%v left", timeLeft) | ||||
| 					if timeLeft < reportingPeriod { | ||||
| 						time.Sleep(timeLeft) | ||||
| 					} else { | ||||
| 						time.Sleep(reportingPeriod) | ||||
| 					} | ||||
| 					logPods(f.Client) | ||||
| 				} | ||||
|  | ||||
| 				rc.Stop() | ||||
|  | ||||
| 				By("Reporting overall resource usage") | ||||
| 				logPods(f.Client) | ||||
|  | ||||
| 			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() { | ||||
| 				runResourceUsageTest(f, rc, itArg) | ||||
| 				// Log and verify resource usage | ||||
| 				verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc) | ||||
| 				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) | ||||
| 			}) | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	Context("regular resource usage tracking [Benchmark]", func() { | ||||
| 		rTests := []resourceTest{ | ||||
| 			{ | ||||
| 				pods: 10, | ||||
| 			}, | ||||
| 			{ | ||||
| 				pods: 35, | ||||
| 			}, | ||||
| 			{ | ||||
| 				pods: 105, | ||||
| 			}, | ||||
| 		} | ||||
|  | ||||
| 		for _, testArg := range rTests { | ||||
| 			itArg := testArg | ||||
|  | ||||
| 			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() { | ||||
| 				runResourceUsageTest(f, rc, itArg) | ||||
| 				// Log and verify resource usage | ||||
| 				printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true) | ||||
| 			}) | ||||
| 		} | ||||
| 	}) | ||||
| }) | ||||
|  | ||||
| type resourceTest struct { | ||||
| 	podsPerNode int | ||||
| 	cpuLimits   framework.ContainersCPUSummary | ||||
| 	memLimits   framework.ResourceUsagePerContainer | ||||
| 	pods      int | ||||
| 	cpuLimits framework.ContainersCPUSummary | ||||
| 	memLimits framework.ResourceUsagePerContainer | ||||
| } | ||||
|  | ||||
| // verifyResource verifies whether resource usage satisfies the limit. | ||||
| func verifyResource(f *framework.Framework, cpuLimits framework.ContainersCPUSummary, | ||||
| 	memLimits framework.ResourceUsagePerContainer, rc *ResourceCollector) { | ||||
| // runResourceUsageTest runs the resource usage test | ||||
| func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg resourceTest) { | ||||
| 	const ( | ||||
| 		// The monitoring time for one test | ||||
| 		monitoringTime = 10 * time.Minute | ||||
| 		// The periodic reporting period | ||||
| 		reportingPeriod = 5 * time.Minute | ||||
| 		// sleep for an interval here to measure steady data | ||||
| 		sleepAfterCreatePods = 10 * time.Second | ||||
| 	) | ||||
|  | ||||
| 	rc.Start() | ||||
| 	defer rc.Stop() | ||||
|  | ||||
| 	By("Creating a batch of Pods") | ||||
| 	pods := newTestPods(testArg.pods, ImageRegistry[pauseImage], "test_pod") | ||||
| 	f.PodClient().CreateBatch(pods) | ||||
|  | ||||
| 	// wait for a while to let the node be steady | ||||
| 	time.Sleep(sleepAfterCreatePods) | ||||
|  | ||||
| 	// Log once and flush the stats. | ||||
| 	rc.LogLatest() | ||||
| 	rc.Reset() | ||||
|  | ||||
| 	By("Start monitoring resource usage") | ||||
| 	// Periodically dump the cpu summary until the deadline is met. | ||||
| 	// Note that without calling framework.ResourceMonitor.Reset(), the stats | ||||
| 	// would occupy increasingly more memory. This should be fine | ||||
| 	// for the current test duration, but we should reclaim the | ||||
| 	// entries if we plan to monitor longer (e.g., 8 hours). | ||||
| 	deadline := time.Now().Add(monitoringTime) | ||||
| 	for time.Now().Before(deadline) { | ||||
| 		timeLeft := deadline.Sub(time.Now()) | ||||
| 		framework.Logf("Still running...%v left", timeLeft) | ||||
| 		if timeLeft < reportingPeriod { | ||||
| 			time.Sleep(timeLeft) | ||||
| 		} else { | ||||
| 			time.Sleep(reportingPeriod) | ||||
| 		} | ||||
| 		logPods(f.Client) | ||||
| 	} | ||||
|  | ||||
| 	By("Reporting overall resource usage") | ||||
| 	logPods(f.Client) | ||||
| } | ||||
|  | ||||
| // printAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit. | ||||
| func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary, | ||||
| 	memLimits framework.ResourceUsagePerContainer, isVerify bool) { | ||||
| 	nodeName := framework.TestContext.NodeName | ||||
|  | ||||
| 	// Obtain memory PerfData | ||||
| @@ -158,20 +181,22 @@ func verifyResource(f *framework.Framework, cpuLimits framework.ContainersCPUSum | ||||
| 	usagePerNode := make(framework.ResourceUsagePerNode) | ||||
| 	usagePerNode[nodeName] = usagePerContainer | ||||
|  | ||||
| 	// Obtain cpu PerfData | ||||
| 	// Obtain CPU PerfData | ||||
| 	cpuSummary := rc.GetCPUSummary() | ||||
| 	framework.Logf("%s", formatCPUSummary(cpuSummary)) | ||||
|  | ||||
| 	cpuSummaryPerNode := make(framework.NodesCPUSummary) | ||||
| 	cpuSummaryPerNode[nodeName] = cpuSummary | ||||
|  | ||||
| 	// Log resource usage | ||||
| 	// Print resource usage | ||||
| 	framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode)) | ||||
| 	framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode)) | ||||
|  | ||||
| 	// Verify resource usage | ||||
| 	verifyMemoryLimits(f.Client, memLimits, usagePerNode) | ||||
| 	verifyCPULimits(cpuLimits, cpuSummaryPerNode) | ||||
| 	if isVerify { | ||||
| 		verifyMemoryLimits(f.Client, memLimits, usagePerNode) | ||||
| 		verifyCPULimits(cpuLimits, cpuSummaryPerNode) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue