modify resource_collector.go to get container names of kubelet and docker dynamically
This commit is contained in:
		@@ -1,9 +1,12 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
Copyright 2015 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
@@ -42,15 +45,13 @@ const (
 | 
			
		||||
	kubeletAddr = "localhost:10255"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 | 
			
		||||
	const (
 | 
			
		||||
		// the data collection time of `resource collector' and the standalone cadvisor
 | 
			
		||||
		// is not synchronizated. Therefore `resource collector' may miss data or
 | 
			
		||||
		// collect duplicated data
 | 
			
		||||
		monitoringInterval    = 500 * time.Millisecond
 | 
			
		||||
		sleepBeforeEach       = 30 * time.Second
 | 
			
		||||
		sleepBeforeCreatePods = 30 * time.Second
 | 
			
		||||
		sleepAfterDeletePods  = 60 * time.Second
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	var (
 | 
			
		||||
@@ -67,7 +68,6 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	AfterEach(func() {
 | 
			
		||||
		time.Sleep(sleepAfterDeletePods)
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	Context("create a batch of pods", func() {
 | 
			
		||||
@@ -76,41 +76,21 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				podsNr:   10,
 | 
			
		||||
				interval: 0 * time.Millisecond,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.20},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.50},
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.30},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
 | 
			
		||||
				},
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 250 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
				// percentile limit of single pod startup latency
 | 
			
		||||
				podStartupLimits: framework.LatencyMetric{
 | 
			
		||||
					Perc50: 7 * time.Second,
 | 
			
		||||
					Perc90: 10 * time.Second,
 | 
			
		||||
					Perc99: 15 * time.Second,
 | 
			
		||||
					Perc50: 10 * time.Second,
 | 
			
		||||
					Perc90: 15 * time.Second,
 | 
			
		||||
					Perc99: 20 * time.Second,
 | 
			
		||||
				},
 | 
			
		||||
				// upbound of startup latency of a batch of pods
 | 
			
		||||
				podBatchStartupLimit: 20 * time.Second,
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				podsNr:   30,
 | 
			
		||||
				interval: 0 * time.Millisecond,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.35},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.70},
 | 
			
		||||
				},
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
				// percentile limit of single pod startup latency
 | 
			
		||||
				podStartupLimits: framework.LatencyMetric{
 | 
			
		||||
					Perc50: 30 * time.Second,
 | 
			
		||||
					Perc90: 35 * time.Second,
 | 
			
		||||
					Perc99: 40 * time.Second,
 | 
			
		||||
				},
 | 
			
		||||
				// upbound of startup latency of a batch of pods
 | 
			
		||||
				podBatchStartupLimit: 90 * time.Second,
 | 
			
		||||
				podBatchStartupLimit: 25 * time.Second,
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -139,7 +119,7 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				controller := newInformerWatchPod(f, mutex, watchTimes, podType)
 | 
			
		||||
				go controller.Run(stopCh)
 | 
			
		||||
 | 
			
		||||
				// Zhou: In test we see kubelet starts while it is busy on sth, as a result `syncLoop'
 | 
			
		||||
				// Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop'
 | 
			
		||||
				// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
 | 
			
		||||
				// The node status has been `ready' so `wait and check node being ready' does not help here.
 | 
			
		||||
				// Now wait here for a grace period to have `syncLoop' be ready
 | 
			
		||||
@@ -153,14 +133,14 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				// it returns a map[`pod name']`creation time' as the creation timestamps
 | 
			
		||||
				createTimes := createBatchPodWithRateControl(f, pods, itArg.interval)
 | 
			
		||||
 | 
			
		||||
				By("Waiting for all Pods begin observed by the watch...")
 | 
			
		||||
				// checks every 10s util all pods are running. it timeouts ater 10min
 | 
			
		||||
				By("Waiting for all Pods to be observed by the watch...")
 | 
			
		||||
				// checks every 10s util all pods are running. it times out ater 10min
 | 
			
		||||
				Eventually(func() bool {
 | 
			
		||||
					return len(watchTimes) == itArg.podsNr
 | 
			
		||||
				}, 10*time.Minute, 10*time.Second).Should(BeTrue())
 | 
			
		||||
 | 
			
		||||
				if len(watchTimes) < itArg.podsNr {
 | 
			
		||||
					framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
 | 
			
		||||
					framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				// stop the watching controller, and the resource collector
 | 
			
		||||
@@ -204,18 +184,6 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				// verify resource
 | 
			
		||||
				By("Verifying resource")
 | 
			
		||||
				verifyResource(f, testArg, rm)
 | 
			
		||||
 | 
			
		||||
				// delete pods
 | 
			
		||||
				By("Deleting a batch of pods")
 | 
			
		||||
				deleteBatchPod(f, pods)
 | 
			
		||||
 | 
			
		||||
				// tear down cadvisor
 | 
			
		||||
				Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
 | 
			
		||||
					NotTo(HaveOccurred())
 | 
			
		||||
 | 
			
		||||
				Eventually(func() error {
 | 
			
		||||
					return checkPodDeleted(f, cadvisorPodName)
 | 
			
		||||
				}, 10*time.Minute, time.Second*3).Should(BeNil())
 | 
			
		||||
			})
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
@@ -226,34 +194,17 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				podsNr:   10,
 | 
			
		||||
				bgPodsNr: 10,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.12},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.16, 0.95: 0.20},
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
 | 
			
		||||
				},
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
				podStartupLimits: framework.LatencyMetric{
 | 
			
		||||
					Perc50: 1500 * time.Millisecond,
 | 
			
		||||
					Perc90: 2500 * time.Millisecond,
 | 
			
		||||
					Perc99: 3500 * time.Millisecond,
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				podsNr:   10,
 | 
			
		||||
				bgPodsNr: 30,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.15},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.22, 0.95: 0.27},
 | 
			
		||||
				},
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
				podStartupLimits: framework.LatencyMetric{
 | 
			
		||||
					Perc50: 1500 * time.Millisecond,
 | 
			
		||||
					Perc90: 2500 * time.Millisecond,
 | 
			
		||||
					Perc99: 3500 * time.Millisecond,
 | 
			
		||||
					Perc50: 3000 * time.Millisecond,
 | 
			
		||||
					Perc90: 4000 * time.Millisecond,
 | 
			
		||||
					Perc99: 5000 * time.Millisecond,
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
@@ -273,7 +224,7 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				// all pods are running when it returns
 | 
			
		||||
				f.PodClient().CreateBatch(bgPods)
 | 
			
		||||
 | 
			
		||||
				//time.Sleep(sleepBeforeCreatePods)
 | 
			
		||||
				time.Sleep(sleepBeforeCreatePods)
 | 
			
		||||
 | 
			
		||||
				// starting resource monitoring
 | 
			
		||||
				rm.Start()
 | 
			
		||||
@@ -290,18 +241,6 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
				// verify resource
 | 
			
		||||
				By("Verifying resource")
 | 
			
		||||
				verifyResource(f, testArg, rm)
 | 
			
		||||
 | 
			
		||||
				// delete pods
 | 
			
		||||
				By("Deleting a batch of pods")
 | 
			
		||||
				deleteBatchPod(f, append(bgPods, testPods...))
 | 
			
		||||
 | 
			
		||||
				// tear down cadvisor
 | 
			
		||||
				Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
 | 
			
		||||
					NotTo(HaveOccurred())
 | 
			
		||||
 | 
			
		||||
				Eventually(func() error {
 | 
			
		||||
					return checkPodDeleted(f, cadvisorPodName)
 | 
			
		||||
				}, 10*time.Minute, time.Second*3).Should(BeNil())
 | 
			
		||||
			})
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
@@ -309,7 +248,8 @@ var _ = framework.KubeDescribe("Density", func() {
 | 
			
		||||
 | 
			
		||||
type DensityTest struct {
 | 
			
		||||
	// number of pods
 | 
			
		||||
	podsNr   int
 | 
			
		||||
	podsNr int
 | 
			
		||||
	// number of background pods
 | 
			
		||||
	bgPodsNr int
 | 
			
		||||
	// interval between creating pod (rate control)
 | 
			
		||||
	interval time.Duration
 | 
			
		||||
 
 | 
			
		||||
@@ -239,6 +239,10 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		cmdArgs = append(cmdArgs, getKubeletServerBin())
 | 
			
		||||
		cmdArgs = append(cmdArgs,
 | 
			
		||||
			"--kubelet-cgroups=/kubelet",
 | 
			
		||||
			"--runtime-cgroups=/docker-daemon",
 | 
			
		||||
		)
 | 
			
		||||
	}
 | 
			
		||||
	cmdArgs = append(cmdArgs,
 | 
			
		||||
		"--api-servers", "http://127.0.0.1:8080",
 | 
			
		||||
@@ -252,9 +256,6 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
 | 
			
		||||
		"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
 | 
			
		||||
		"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
 | 
			
		||||
		"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
 | 
			
		||||
		"--cgroup-root=/",
 | 
			
		||||
		"--runtime-cgroups=/docker-daemon",
 | 
			
		||||
		"--kubelet-cgroups=/kubelet",
 | 
			
		||||
	)
 | 
			
		||||
	if es.cgroupsPerQOS {
 | 
			
		||||
		cmdArgs = append(cmdArgs,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,13 +1,16 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
Copyright 2015 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing perissions and
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
@@ -16,7 +19,12 @@ package e2e_node
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"log"
 | 
			
		||||
	"os"
 | 
			
		||||
	"os/exec"
 | 
			
		||||
	"sort"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"sync"
 | 
			
		||||
	"text/tabwriter"
 | 
			
		||||
@@ -24,10 +32,12 @@ import (
 | 
			
		||||
 | 
			
		||||
	cadvisorclient "github.com/google/cadvisor/client/v2"
 | 
			
		||||
	cadvisorapiv2 "github.com/google/cadvisor/info/v2"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/cgroups"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util/runtime"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util/wait"
 | 
			
		||||
	"k8s.io/kubernetes/test/e2e/framework"
 | 
			
		||||
 | 
			
		||||
@@ -39,15 +49,12 @@ const (
 | 
			
		||||
	cadvisorImageName = "google/cadvisor:latest"
 | 
			
		||||
	cadvisorPodName   = "cadvisor"
 | 
			
		||||
	cadvisorPort      = 8090
 | 
			
		||||
	// housekeeping interval of Cadvisor (second)
 | 
			
		||||
	houseKeepingInterval = 1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	systemContainers = map[string]string{
 | 
			
		||||
		//"root": "/",
 | 
			
		||||
		//stats.SystemContainerMisc: "misc"
 | 
			
		||||
		stats.SystemContainerKubelet: "kubelet",
 | 
			
		||||
		stats.SystemContainerRuntime: "docker-daemon",
 | 
			
		||||
	}
 | 
			
		||||
	systemContainers map[string]string
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type ResourceCollector struct {
 | 
			
		||||
@@ -69,6 +76,18 @@ func NewResourceCollector(interval time.Duration) *ResourceCollector {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *ResourceCollector) Start() {
 | 
			
		||||
	// Get the cgroup containers for kubelet and docker
 | 
			
		||||
	kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "")
 | 
			
		||||
	dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		systemContainers = map[string]string{
 | 
			
		||||
			stats.SystemContainerKubelet: kubeletContainer,
 | 
			
		||||
			stats.SystemContainerRuntime: dockerContainer,
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		framework.Failf("Failed to get docker container name in test-e2e-node resource collector.")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) {
 | 
			
		||||
		var err error
 | 
			
		||||
		r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort))
 | 
			
		||||
@@ -123,7 +142,7 @@ func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.C
 | 
			
		||||
			framework.Logf("Error getting container stats, err: %v", err)
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		cStats, ok := ret["/"+name]
 | 
			
		||||
		cStats, ok := ret[name]
 | 
			
		||||
		if !ok {
 | 
			
		||||
			framework.Logf("Missing info/stats for container %q", name)
 | 
			
		||||
			return
 | 
			
		||||
@@ -160,7 +179,7 @@ func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, er
 | 
			
		||||
	for key, name := range systemContainers {
 | 
			
		||||
		contStats, ok := r.buffers[name]
 | 
			
		||||
		if !ok || len(contStats) == 0 {
 | 
			
		||||
			return nil, fmt.Errorf("Resource usage is not ready yet")
 | 
			
		||||
			return nil, fmt.Errorf("Resource usage of %s:%s is not ready yet", key, name)
 | 
			
		||||
		}
 | 
			
		||||
		stats[key] = contStats[len(contStats)-1]
 | 
			
		||||
	}
 | 
			
		||||
@@ -257,11 +276,10 @@ func createCadvisorPod(f *framework.Framework) {
 | 
			
		||||
	f.PodClient().CreateSync(&api.Pod{
 | 
			
		||||
		ObjectMeta: api.ObjectMeta{
 | 
			
		||||
			Name: cadvisorPodName,
 | 
			
		||||
			//Labels: map[string]string{"type": cadvisorPodType, "name": cadvisorPodName},
 | 
			
		||||
		},
 | 
			
		||||
		Spec: api.PodSpec{
 | 
			
		||||
			// Don't restart the Pod since it is expected to exit
 | 
			
		||||
			RestartPolicy: api.RestartPolicyNever,
 | 
			
		||||
			// It uses a host port for the tests to collect data.
 | 
			
		||||
			// Currently we can not use port mapping in test-e2e-node.
 | 
			
		||||
			SecurityContext: &api.PodSecurityContext{
 | 
			
		||||
				HostNetwork: true,
 | 
			
		||||
			},
 | 
			
		||||
@@ -301,7 +319,7 @@ func createCadvisorPod(f *framework.Framework) {
 | 
			
		||||
					},
 | 
			
		||||
					Args: []string{
 | 
			
		||||
						"--profiling",
 | 
			
		||||
						"--housekeeping_interval=1s",
 | 
			
		||||
						fmt.Sprintf("--housekeeping_interval=%ds", houseKeepingInterval),
 | 
			
		||||
						fmt.Sprintf("--port=%d", cadvisorPort),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
@@ -336,7 +354,7 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
 | 
			
		||||
		go func(pod *api.Pod) {
 | 
			
		||||
			defer wg.Done()
 | 
			
		||||
 | 
			
		||||
			err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(60))
 | 
			
		||||
			err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(30))
 | 
			
		||||
			Expect(err).NotTo(HaveOccurred())
 | 
			
		||||
 | 
			
		||||
			Expect(framework.WaitForPodToDisappear(f.Client, ns, pod.ObjectMeta.Name, labels.Everything(),
 | 
			
		||||
@@ -348,9 +366,9 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
 | 
			
		||||
func newTestPods(numPods int, imageName, podType string) []*api.Pod {
 | 
			
		||||
	var pods []*api.Pod
 | 
			
		||||
	for i := 0; i < podsPerNode; i++ {
 | 
			
		||||
	for i := 0; i < numPods; i++ {
 | 
			
		||||
		podName := "test-" + string(util.NewUUID())
 | 
			
		||||
		labels := map[string]string{
 | 
			
		||||
			"type": podType,
 | 
			
		||||
@@ -363,7 +381,8 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
 | 
			
		||||
					Labels: labels,
 | 
			
		||||
				},
 | 
			
		||||
				Spec: api.PodSpec{
 | 
			
		||||
					RestartPolicy: api.RestartPolicyNever,
 | 
			
		||||
					// ToDo: restart policy is always
 | 
			
		||||
					// check whether pods restart at the end of tests
 | 
			
		||||
					Containers: []api.Container{
 | 
			
		||||
						{
 | 
			
		||||
							Image: imageName,
 | 
			
		||||
@@ -375,3 +394,119 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
 | 
			
		||||
	}
 | 
			
		||||
	return pods
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// code for getting container name of docker
 | 
			
		||||
const (
 | 
			
		||||
	kubeletProcessName    = "kubelet"
 | 
			
		||||
	dockerProcessName     = "docker"
 | 
			
		||||
	dockerPidFile         = "/var/run/docker.pid"
 | 
			
		||||
	containerdProcessName = "docker-containerd"
 | 
			
		||||
	containerdPidFile     = "/run/docker/libcontainerd/docker-containerd.pid"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func getContainerNameForProcess(name, pidFile string) (string, error) {
 | 
			
		||||
	pids, err := getPidsForProcess(name, pidFile)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
 | 
			
		||||
	}
 | 
			
		||||
	if len(pids) == 0 {
 | 
			
		||||
		return "", nil
 | 
			
		||||
	}
 | 
			
		||||
	cont, err := getContainer(pids[0])
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	return cont, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getPidFromPidFile(pidFile string) (int, error) {
 | 
			
		||||
	file, err := os.Open(pidFile)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err)
 | 
			
		||||
	}
 | 
			
		||||
	defer file.Close()
 | 
			
		||||
 | 
			
		||||
	data, err := ioutil.ReadAll(file)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pid, err := strconv.Atoi(string(data))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return pid, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getPidsForProcess(name, pidFile string) ([]int, error) {
 | 
			
		||||
	if len(pidFile) > 0 {
 | 
			
		||||
		if pid, err := getPidFromPidFile(pidFile); err == nil {
 | 
			
		||||
			return []int{pid}, nil
 | 
			
		||||
		} else {
 | 
			
		||||
			// log the error and fall back to pidof
 | 
			
		||||
			runtime.HandleError(err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	out, err := exec.Command("pidof", name).Output()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// The output of pidof is a list of pids.
 | 
			
		||||
	pids := []int{}
 | 
			
		||||
	for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") {
 | 
			
		||||
		pid, err := strconv.Atoi(pidStr)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		pids = append(pids, pid)
 | 
			
		||||
	}
 | 
			
		||||
	return pids, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// getContainer returns the cgroup associated with the specified pid.
 | 
			
		||||
// It enforces a unified hierarchy for memory and cpu cgroups.
 | 
			
		||||
// On systemd environments, it uses the name=systemd cgroup for the specified pid.
 | 
			
		||||
func getContainer(pid int) (string, error) {
 | 
			
		||||
	cgs, err := cgroups.ParseCgroupFile(fmt.Sprintf("/proc/%d/cgroup", pid))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cpu, found := cgs["cpu"]
 | 
			
		||||
	if !found {
 | 
			
		||||
		return "", cgroups.NewNotFoundError("cpu")
 | 
			
		||||
	}
 | 
			
		||||
	memory, found := cgs["memory"]
 | 
			
		||||
	if !found {
 | 
			
		||||
		return "", cgroups.NewNotFoundError("memory")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// since we use this container for accounting, we need to ensure its a unified hierarchy.
 | 
			
		||||
	if cpu != memory {
 | 
			
		||||
		return "", fmt.Errorf("cpu and memory cgroup hierarchy not unified.  cpu: %s, memory: %s", cpu, memory)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// on systemd, every pid is in a unified cgroup hierarchy (name=systemd as seen in systemd-cgls)
 | 
			
		||||
	// cpu and memory accounting is off by default, users may choose to enable it per unit or globally.
 | 
			
		||||
	// users could enable CPU and memory accounting globally via /etc/systemd/system.conf (DefaultCPUAccounting=true DefaultMemoryAccounting=true).
 | 
			
		||||
	// users could also enable CPU and memory accounting per unit via CPUAccounting=true and MemoryAccounting=true
 | 
			
		||||
	// we only warn if accounting is not enabled for CPU or memory so as to not break local development flows where kubelet is launched in a terminal.
 | 
			
		||||
	// for example, the cgroup for the user session will be something like /user.slice/user-X.slice/session-X.scope, but the cpu and memory
 | 
			
		||||
	// cgroup will be the closest ancestor where accounting is performed (most likely /) on systems that launch docker containers.
 | 
			
		||||
	// as a result, on those systems, you will not get cpu or memory accounting statistics for kubelet.
 | 
			
		||||
	// in addition, you would not get memory or cpu accounting for the runtime unless accounting was enabled on its unit (or globally).
 | 
			
		||||
	if systemd, found := cgs["name=systemd"]; found {
 | 
			
		||||
		if systemd != cpu {
 | 
			
		||||
			log.Printf("CPUAccounting not enabled for pid: %d", pid)
 | 
			
		||||
		}
 | 
			
		||||
		if systemd != memory {
 | 
			
		||||
			log.Printf("MemoryAccounting not enabled for pid: %d", pid)
 | 
			
		||||
		}
 | 
			
		||||
		return systemd, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return cpu, nil
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,12 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
Copyright 2015 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
@@ -18,36 +21,33 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	client "k8s.io/kubernetes/pkg/client/unversioned"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
	"k8s.io/kubernetes/test/e2e/framework"
 | 
			
		||||
 | 
			
		||||
	. "github.com/onsi/ginkgo"
 | 
			
		||||
	. "github.com/onsi/gomega"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
 | 
			
		||||
	const (
 | 
			
		||||
		// Interval to poll /stats/container on a node
 | 
			
		||||
		containerStatsPollingPeriod = 10 * time.Second
 | 
			
		||||
		// The monitoring time for one test.
 | 
			
		||||
		monitoringTime = 6 * time.Minute
 | 
			
		||||
		monitoringTime = 10 * time.Minute
 | 
			
		||||
		// The periodic reporting period.
 | 
			
		||||
		reportingPeriod = 3 * time.Minute
 | 
			
		||||
		reportingPeriod = 5 * time.Minute
 | 
			
		||||
 | 
			
		||||
		sleepAfterCreatePods = 10 * time.Second
 | 
			
		||||
		sleepAfterDeletePods = 120 * time.Second
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	var (
 | 
			
		||||
		ns string
 | 
			
		||||
		rm *ResourceCollector
 | 
			
		||||
		rc *ResourceCollector
 | 
			
		||||
		om *framework.RuntimeOperationMonitor
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	f := framework.NewDefaultFramework("kubelet-perf")
 | 
			
		||||
	f := framework.NewDefaultFramework("resource-usage")
 | 
			
		||||
 | 
			
		||||
	BeforeEach(func() {
 | 
			
		||||
		ns = f.Namespace.Name
 | 
			
		||||
@@ -59,45 +59,22 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
		framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result))
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// This test measures and verifies the steady resource usage of node is within limit
 | 
			
		||||
	// It collects data from a standalone Cadvisor with housekeeping interval 1s.
 | 
			
		||||
	// It verifies CPU percentiles and the lastest memory usage.
 | 
			
		||||
	Context("regular resource usage tracking", func() {
 | 
			
		||||
		rTests := []resourceTest{
 | 
			
		||||
			{
 | 
			
		||||
				podsPerNode: 0,
 | 
			
		||||
				podsPerNode: 10,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.06, 0.95: 0.08},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.06},
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
 | 
			
		||||
				},
 | 
			
		||||
				// We set the memory limits generously because the distribution
 | 
			
		||||
				// of the addon pods affect the memory usage on each node.
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 85 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				podsPerNode: 35,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.14},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.07},
 | 
			
		||||
				},
 | 
			
		||||
				// We set the memory limits generously because the distribution
 | 
			
		||||
				// of the addon pods affect the memory usage on each node.
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 150 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				podsPerNode: 100,
 | 
			
		||||
				cpuLimits: framework.ContainersCPUSummary{
 | 
			
		||||
					stats.SystemContainerKubelet: {0.50: 0.17, 0.95: 0.22},
 | 
			
		||||
					stats.SystemContainerRuntime: {0.50: 0.06, 0.95: 0.09},
 | 
			
		||||
				},
 | 
			
		||||
				// We set the memory limits generously because the distribution
 | 
			
		||||
				// of the addon pods affect the memory usage on each node.
 | 
			
		||||
				memLimits: framework.ResourceUsagePerContainer{
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 80 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
 | 
			
		||||
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
@@ -111,9 +88,13 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
			It(name, func() {
 | 
			
		||||
				expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits
 | 
			
		||||
 | 
			
		||||
				// The test collects resource usage from a standalone Cadvisor pod.
 | 
			
		||||
				// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
 | 
			
		||||
				// show the resource usage spikes. But changing its interval increases the overhead
 | 
			
		||||
				// of kubelet. Hence we use a Cadvisor pod.
 | 
			
		||||
				createCadvisorPod(f)
 | 
			
		||||
				rm = NewResourceCollector(containerStatsPollingPeriod)
 | 
			
		||||
				rm.Start()
 | 
			
		||||
				rc = NewResourceCollector(containerStatsPollingPeriod)
 | 
			
		||||
				rc.Start()
 | 
			
		||||
 | 
			
		||||
				By("Creating a batch of Pods")
 | 
			
		||||
				pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod")
 | 
			
		||||
@@ -125,8 +106,8 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
				time.Sleep(sleepAfterCreatePods)
 | 
			
		||||
 | 
			
		||||
				// Log once and flush the stats.
 | 
			
		||||
				rm.LogLatest()
 | 
			
		||||
				rm.Reset()
 | 
			
		||||
				rc.LogLatest()
 | 
			
		||||
				rc.Reset()
 | 
			
		||||
 | 
			
		||||
				By("Start monitoring resource usage")
 | 
			
		||||
				// Periodically dump the cpu summary until the deadline is met.
 | 
			
		||||
@@ -143,13 +124,15 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
					} else {
 | 
			
		||||
						time.Sleep(reportingPeriod)
 | 
			
		||||
					}
 | 
			
		||||
					logPodsOnNodes(f.Client)
 | 
			
		||||
					logPodsOnNode(f.Client)
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				By("Reporting overall resource usage")
 | 
			
		||||
				logPodsOnNodes(f.Client)
 | 
			
		||||
				rc.Stop()
 | 
			
		||||
 | 
			
		||||
				usagePerContainer, err := rm.GetLatest()
 | 
			
		||||
				By("Reporting overall resource usage")
 | 
			
		||||
				logPodsOnNode(f.Client)
 | 
			
		||||
 | 
			
		||||
				usagePerContainer, err := rc.GetLatest()
 | 
			
		||||
				Expect(err).NotTo(HaveOccurred())
 | 
			
		||||
 | 
			
		||||
				// TODO(random-liu): Remove the original log when we migrate to new perfdash
 | 
			
		||||
@@ -163,7 +146,7 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
				framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
 | 
			
		||||
				verifyMemoryLimits(f.Client, expectedMemory, usagePerNode)
 | 
			
		||||
 | 
			
		||||
				cpuSummary := rm.GetCPUSummary()
 | 
			
		||||
				cpuSummary := rc.GetCPUSummary()
 | 
			
		||||
				framework.Logf("%s", formatCPUSummary(cpuSummary))
 | 
			
		||||
 | 
			
		||||
				// Log perf result
 | 
			
		||||
@@ -171,21 +154,6 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 | 
			
		||||
				cpuSummaryPerNode[nodeName] = cpuSummary
 | 
			
		||||
				framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
 | 
			
		||||
				verifyCPULimits(expectedCPU, cpuSummaryPerNode)
 | 
			
		||||
 | 
			
		||||
				// delete pods
 | 
			
		||||
				By("Deleting a batch of pods")
 | 
			
		||||
				deleteBatchPod(f, pods)
 | 
			
		||||
 | 
			
		||||
				rm.Stop()
 | 
			
		||||
 | 
			
		||||
				// tear down cadvisor
 | 
			
		||||
				Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
 | 
			
		||||
					NotTo(HaveOccurred())
 | 
			
		||||
				Expect(framework.WaitForPodToDisappear(f.Client, ns, cadvisorPodName, labels.Everything(),
 | 
			
		||||
					3*time.Second, 10*time.Minute)).
 | 
			
		||||
					NotTo(HaveOccurred())
 | 
			
		||||
 | 
			
		||||
				time.Sleep(sleepAfterDeletePods)
 | 
			
		||||
			})
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
@@ -267,7 +235,7 @@ func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.N
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func logPodsOnNodes(c *client.Client) {
 | 
			
		||||
func logPodsOnNode(c *client.Client) {
 | 
			
		||||
	nodeName := framework.TestContext.NodeName
 | 
			
		||||
	podList, err := framework.GetKubeletRunningPods(c, nodeName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Reference in New Issue
	
	Block a user