diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index 6aa3137d0a5..f436d0f608d 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -112,8 +112,14 @@ GKE_REQUIRED_SKIP_TESTS=( "Shell" "Daemon\sset" "Deployment" + "experimental\sresource\susage\stracking" # Expect --max-pods=100 ) +# Tests which cannot be run on AWS. +AWS_REQUIRED_SKIP_TESTS=( + "experimental\sresource\susage\stracking" # Expect --max-pods=100 +) + # The following tests are known to be flaky, and are thus run only in their own # -flaky- build variants. GCE_FLAKY_TESTS=( @@ -301,6 +307,7 @@ case ${JOB_NAME} in ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \ + ${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \ )"} : ${ENABLE_DEPLOYMENTS:=true} # Override AWS defaults. @@ -439,6 +446,7 @@ case ${JOB_NAME} in ${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ + ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ )"} ;; @@ -455,6 +463,7 @@ case ${JOB_NAME} in ${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ + ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ )"} ;; @@ -499,6 +508,7 @@ case ${JOB_NAME} in ${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ + ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ )"} ;; diff --git a/test/e2e/kubelet_perf.go b/test/e2e/kubelet_perf.go index dad28b9a5f8..ff4281e8d1d 100644 --- a/test/e2e/kubelet_perf.go +++ b/test/e2e/kubelet_perf.go @@ -50,8 +50,55 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) { } } +func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) { + numNodes := nodeNames.Len() + totalPods := podsPerNode * numNodes + By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods)) + rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID())) + + // TODO: Use a more realistic workload + Expect(RunRC(RCConfig{ + Client: framework.Client, + Name: rcName, + Namespace: framework.Namespace.Name, + Image: "gcr.io/google_containers/pause:go", + Replicas: totalPods, + })).NotTo(HaveOccurred()) + + // Log once and flush the stats. + resourceMonitor.LogLatest() + resourceMonitor.Reset() + + By("Start monitoring resource usage") + // Periodically dump the cpu summary until the deadline is met. + // Note that without calling resourceMonitor.Reset(), the stats + // would occupy increasingly more memory. This should be fine + // for the current test duration, but we should reclaim the + // entries if we plan to monitor longer (e.g., 8 hours). + deadline := time.Now().Add(monitoringTime) + for time.Now().Before(deadline) { + Logf("Still running...%v left", deadline.Sub(time.Now())) + time.Sleep(reportingPeriod) + timeLeft := deadline.Sub(time.Now()) + Logf("Still running...%v left", timeLeft) + if timeLeft < reportingPeriod { + time.Sleep(timeLeft) + } else { + time.Sleep(reportingPeriod) + } + logPodsOnNodes(framework.Client, nodeNames.List()) + } + + By("Reporting overall resource usage") + logPodsOnNodes(framework.Client, nodeNames.List()) + resourceMonitor.LogCPUSummary() + resourceMonitor.LogLatest() + + By("Deleting the RC") + DeleteRC(framework.Client, framework.Namespace.Name, rcName) +} + var _ = Describe("Kubelet", func() { - var numNodes int var nodeNames sets.String framework := NewFramework("kubelet-perf") var resourceMonitor *resourceMonitor @@ -59,7 +106,6 @@ var _ = Describe("Kubelet", func() { BeforeEach(func() { nodes, err := framework.Client.Nodes().List(labels.Everything(), fields.Everything()) expectNoError(err) - numNodes = len(nodes.Items) nodeNames = sets.NewString() for _, node := range nodes.Items { nodeNames.Insert(node.Name) @@ -72,61 +118,25 @@ var _ = Describe("Kubelet", func() { resourceMonitor.Stop() }) - Describe("resource usage tracking", func() { - density := []int{0, 50} + Describe("regular resource usage tracking", func() { + density := []int{0, 35} for i := range density { podsPerNode := density[i] name := fmt.Sprintf( "over %v with %d pods per node.", monitoringTime, podsPerNode) It(name, func() { - // Skip this test for GKE. - // TODO: Re-activate this for GKE - SkipIfProviderIs("gke") - - totalPods := podsPerNode * numNodes - By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods)) - rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID())) - - // TODO: Use a more realistic workload - Expect(RunRC(RCConfig{ - Client: framework.Client, - Name: rcName, - Namespace: framework.Namespace.Name, - Image: "gcr.io/google_containers/pause:go", - Replicas: totalPods, - })).NotTo(HaveOccurred()) - - // Log once and flush the stats. - resourceMonitor.LogLatest() - resourceMonitor.Reset() - - By("Start monitoring resource usage") - // Periodically dump the cpu summary until the deadline is met. - // Note that without calling resourceMonitor.Reset(), the stats - // would occupy increasingly more memory. This should be fine - // for the current test duration, but we should reclaim the - // entries if we plan to monitor longer (e.g., 8 hours). - deadline := time.Now().Add(monitoringTime) - for time.Now().Before(deadline) { - Logf("Still running...%v left", deadline.Sub(time.Now())) - time.Sleep(reportingPeriod) - timeLeft := deadline.Sub(time.Now()) - Logf("Still running...%v left", timeLeft) - if timeLeft < reportingPeriod { - time.Sleep(timeLeft) - } else { - time.Sleep(reportingPeriod) - } - logPodsOnNodes(framework.Client, nodeNames.List()) - } - - By("Reporting overall resource usage") - logPodsOnNodes(framework.Client, nodeNames.List()) - resourceMonitor.LogCPUSummary() - resourceMonitor.LogLatest() - - By("Deleting the RC") - DeleteRC(framework.Client, framework.Namespace.Name, rcName) + runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor) + }) + } + }) + Describe("experimental resource usage tracking", func() { + density := []int{50} + for i := range density { + podsPerNode := density[i] + name := fmt.Sprintf( + "over %v with %d pods per node.", monitoringTime, podsPerNode) + It(name, func() { + runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor) }) } })