@@ -149,6 +149,7 @@ GCE_SLOW_TESTS=(
 | 
			
		||||
    "resource\susage\stracking"                       # 1 hour,       file: kubelet_perf.go,         slow by design
 | 
			
		||||
    "monotonically\sincreasing\srestart\scount"       # 1.5 to 5 min, file: pods.go,                 slow by design
 | 
			
		||||
    "KubeProxy\sshould\stest\skube-proxy"             # 9 min 30 sec, file: kubeproxy.go,            issue: #14204
 | 
			
		||||
    "cap\sback-off\sat\sMaxContainerBackOff"          # 20 mins       file: manager.go,              PR:    #12648
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
# Tests which are not able to be run in parallel.
 | 
			
		||||
 
 | 
			
		||||
@@ -88,8 +88,8 @@ const (
 | 
			
		||||
	// Location of container logs.
 | 
			
		||||
	containerLogsDir = "/var/log/containers"
 | 
			
		||||
 | 
			
		||||
	// max backoff period
 | 
			
		||||
	maxContainerBackOff = 300 * time.Second
 | 
			
		||||
	// max backoff period, exported for the e2e test
 | 
			
		||||
	MaxContainerBackOff = 300 * time.Second
 | 
			
		||||
 | 
			
		||||
	// Capacity of the channel for storing pods to kill. A small number should
 | 
			
		||||
	// suffice because a goroutine is dedicated to check the channel and does
 | 
			
		||||
@@ -308,7 +308,7 @@ func NewMainKubelet(
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	procFs := procfs.NewProcFs()
 | 
			
		||||
	imageBackOff := util.NewBackOff(resyncInterval, maxContainerBackOff)
 | 
			
		||||
	imageBackOff := util.NewBackOff(resyncInterval, MaxContainerBackOff)
 | 
			
		||||
	// Initialize the runtime.
 | 
			
		||||
	switch containerRuntime {
 | 
			
		||||
	case "docker":
 | 
			
		||||
@@ -425,7 +425,7 @@ func NewMainKubelet(
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	klet.backOff = util.NewBackOff(resyncInterval, maxContainerBackOff)
 | 
			
		||||
	klet.backOff = util.NewBackOff(resyncInterval, MaxContainerBackOff)
 | 
			
		||||
	klet.podKillingCh = make(chan *kubecontainer.Pod, podKillingChannelCapacity)
 | 
			
		||||
 | 
			
		||||
	klet.sourcesSeen = sets.NewString()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										256
									
								
								test/e2e/pods.go
									
									
									
									
									
								
							
							
						
						
									
										256
									
								
								test/e2e/pods.go
									
									
									
									
									
								
							@@ -20,6 +20,7 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
	"math"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
@@ -31,6 +32,7 @@ import (
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api/resource"
 | 
			
		||||
	client "k8s.io/kubernetes/pkg/client/unversioned"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/fields"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util/wait"
 | 
			
		||||
@@ -44,6 +46,12 @@ const (
 | 
			
		||||
	defaultObservationTimeout = time.Minute * 2
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	buildBackOffDuration = time.Minute
 | 
			
		||||
	syncLoopFrequency    = 10 * time.Second
 | 
			
		||||
	maxBackOffTolerance  = time.Duration(1.3 * float64(kubelet.MaxContainerBackOff))
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectNumRestarts int, timeout time.Duration) {
 | 
			
		||||
	By(fmt.Sprintf("Creating pod %s in namespace %s", podDescr.Name, ns))
 | 
			
		||||
	_, err := c.Pods(ns).Create(podDescr)
 | 
			
		||||
@@ -134,6 +142,70 @@ func testHostIP(c *client.Client, ns string, pod *api.Pod) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func runPodFromStruct(framework *Framework, pod *api.Pod) {
 | 
			
		||||
	By("submitting the pod to kubernetes")
 | 
			
		||||
 | 
			
		||||
	podClient := framework.Client.Pods(framework.Namespace.Name)
 | 
			
		||||
	pod, err := podClient.Create(pod)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Failf("Failed to create pod: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	expectNoError(framework.WaitForPodRunning(pod.Name))
 | 
			
		||||
 | 
			
		||||
	By("verifying the pod is in kubernetes")
 | 
			
		||||
	pod, err = podClient.Get(pod.Name)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Failf("failed to get pod: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func startPodAndGetBackOffs(framework *Framework, pod *api.Pod, podName string, containerName string, sleepAmount time.Duration) (time.Duration, time.Duration) {
 | 
			
		||||
	runPodFromStruct(framework, pod)
 | 
			
		||||
	time.Sleep(sleepAmount)
 | 
			
		||||
 | 
			
		||||
	By("getting restart delay-0")
 | 
			
		||||
	_, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	By("getting restart delay-1")
 | 
			
		||||
	delay1, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	By("getting restart delay-2")
 | 
			
		||||
	delay2, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
 | 
			
		||||
	}
 | 
			
		||||
	return delay1, delay2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getRestartDelay(c *client.Client, pod *api.Pod, ns string, name string, containerName string) (time.Duration, error) {
 | 
			
		||||
	beginTime := time.Now()
 | 
			
		||||
	for time.Since(beginTime) < (2 * maxBackOffTolerance) { // may just miss the 1st MaxContainerBackOff delay
 | 
			
		||||
		time.Sleep(time.Second)
 | 
			
		||||
		pod, err := c.Pods(ns).Get(name)
 | 
			
		||||
		expectNoError(err, fmt.Sprintf("getting pod %s", name))
 | 
			
		||||
		status, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, containerName)
 | 
			
		||||
		if !ok {
 | 
			
		||||
			Logf("getRestartDelay: status missing")
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if status.State.Waiting == nil && status.State.Running != nil && status.LastTerminationState.Terminated != nil && status.State.Running.StartedAt.Time.After(beginTime) {
 | 
			
		||||
			startedAt := status.State.Running.StartedAt.Time
 | 
			
		||||
			finishedAt := status.LastTerminationState.Terminated.FinishedAt.Time
 | 
			
		||||
			Logf("getRestartDelay: finishedAt=%s restartedAt=%s (%s)", finishedAt, startedAt, startedAt.Sub(finishedAt))
 | 
			
		||||
			return startedAt.Sub(finishedAt), nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return 0, fmt.Errorf("timeout getting pod restart delay")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var _ = Describe("Pods", func() {
 | 
			
		||||
	framework := NewFramework("pods")
 | 
			
		||||
 | 
			
		||||
@@ -755,6 +827,190 @@ var _ = Describe("Pods", func() {
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("should have their container restart back-off timer increase exponentially", func() {
 | 
			
		||||
		podName := "pod-back-off-exponentially"
 | 
			
		||||
		containerName := "back-off"
 | 
			
		||||
		podClient := framework.Client.Pods(framework.Namespace.Name)
 | 
			
		||||
		pod := &api.Pod{
 | 
			
		||||
			ObjectMeta: api.ObjectMeta{
 | 
			
		||||
				Name:   podName,
 | 
			
		||||
				Labels: map[string]string{"test": "back-off-image"},
 | 
			
		||||
			},
 | 
			
		||||
			Spec: api.PodSpec{
 | 
			
		||||
				Containers: []api.Container{
 | 
			
		||||
					{
 | 
			
		||||
						Name:    containerName,
 | 
			
		||||
						Image:   "gcr.io/google_containers/busybox",
 | 
			
		||||
						Command: []string{"/bin/sh", "-c", "sleep 5", "/crash/missing"},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		defer func() {
 | 
			
		||||
			By("deleting the pod")
 | 
			
		||||
			podClient.Delete(pod.Name, api.NewDeleteOptions(0))
 | 
			
		||||
		}()
 | 
			
		||||
 | 
			
		||||
		delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
 | 
			
		||||
		delay1 += 1 // divide by zero
 | 
			
		||||
		ratio := float64(delay2) / float64(delay1)
 | 
			
		||||
		if math.Floor(ratio) != 2 && math.Ceil(ratio) != 2 {
 | 
			
		||||
			Failf("back-off gap is not increasing exponentially pod=%s/%s delay1=%s delay2=%s", podName, containerName, delay1, delay2)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("should have their auto-restart back-off timer reset on image update", func() {
 | 
			
		||||
		podName := "pod-back-off-image"
 | 
			
		||||
		containerName := "back-off"
 | 
			
		||||
		podClient := framework.Client.Pods(framework.Namespace.Name)
 | 
			
		||||
		pod := &api.Pod{
 | 
			
		||||
			ObjectMeta: api.ObjectMeta{
 | 
			
		||||
				Name:   podName,
 | 
			
		||||
				Labels: map[string]string{"test": "back-off-image"},
 | 
			
		||||
			},
 | 
			
		||||
			Spec: api.PodSpec{
 | 
			
		||||
				Containers: []api.Container{
 | 
			
		||||
					{
 | 
			
		||||
						Name:    containerName,
 | 
			
		||||
						Image:   "gcr.io/google_containers/busybox",
 | 
			
		||||
						Command: []string{"/bin/sh", "-c", "sleep 5", "/crash/missing"},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		defer func() {
 | 
			
		||||
			By("deleting the pod")
 | 
			
		||||
			podClient.Delete(pod.Name, api.NewDeleteOptions(0))
 | 
			
		||||
		}()
 | 
			
		||||
 | 
			
		||||
		delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
 | 
			
		||||
 | 
			
		||||
		By("updating the image")
 | 
			
		||||
		pod, err := podClient.Get(pod.Name)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			Failf("failed to get pod: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		pod.Spec.Containers[0].Image = "nginx"
 | 
			
		||||
		pod, err = podClient.Update(pod)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			Failf("error updating pod=%s/%s %v", podName, containerName, err)
 | 
			
		||||
		}
 | 
			
		||||
		time.Sleep(syncLoopFrequency)
 | 
			
		||||
		expectNoError(framework.WaitForPodRunning(pod.Name))
 | 
			
		||||
 | 
			
		||||
		By("get restart delay after image update")
 | 
			
		||||
		delayAfterUpdate, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if delayAfterUpdate > delay2 || delayAfterUpdate > delay1 {
 | 
			
		||||
			Failf("updating image did not reset the back-off value in pod=%s/%s d3=%s d2=%s d1=%s", podName, containerName, delayAfterUpdate, delay1, delay2)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("should not back-off restarting a container on LivenessProbe failure", func() {
 | 
			
		||||
		podClient := framework.Client.Pods(framework.Namespace.Name)
 | 
			
		||||
		podName := "pod-back-off-liveness"
 | 
			
		||||
		containerName := "back-off-liveness"
 | 
			
		||||
		pod := &api.Pod{
 | 
			
		||||
			ObjectMeta: api.ObjectMeta{
 | 
			
		||||
				Name:   podName,
 | 
			
		||||
				Labels: map[string]string{"test": "liveness"},
 | 
			
		||||
			},
 | 
			
		||||
			Spec: api.PodSpec{
 | 
			
		||||
				Containers: []api.Container{
 | 
			
		||||
					{
 | 
			
		||||
						Name:    containerName,
 | 
			
		||||
						Image:   "gcr.io/google_containers/busybox",
 | 
			
		||||
						Command: []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 5; rm -rf /tmp/health; sleep 600"},
 | 
			
		||||
						LivenessProbe: &api.Probe{
 | 
			
		||||
							Handler: api.Handler{
 | 
			
		||||
								Exec: &api.ExecAction{
 | 
			
		||||
									Command: []string{"cat", "/tmp/health"},
 | 
			
		||||
								},
 | 
			
		||||
							},
 | 
			
		||||
							InitialDelaySeconds: 5,
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		defer func() {
 | 
			
		||||
			By("deleting the pod")
 | 
			
		||||
			podClient.Delete(pod.Name, api.NewDeleteOptions(0))
 | 
			
		||||
		}()
 | 
			
		||||
 | 
			
		||||
		delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
 | 
			
		||||
 | 
			
		||||
		if math.Abs(float64(delay2-delay1)) > float64(syncLoopFrequency) {
 | 
			
		||||
			Failf("back-off increasing on LivenessProbe failure delay1=%s delay2=%s", delay1, delay2)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("should cap back-off at MaxContainerBackOff", func() {
 | 
			
		||||
		podClient := framework.Client.Pods(framework.Namespace.Name)
 | 
			
		||||
		podName := "back-off-cap"
 | 
			
		||||
		containerName := "back-off-cap"
 | 
			
		||||
		pod := &api.Pod{
 | 
			
		||||
			ObjectMeta: api.ObjectMeta{
 | 
			
		||||
				Name:   podName,
 | 
			
		||||
				Labels: map[string]string{"test": "liveness"},
 | 
			
		||||
			},
 | 
			
		||||
			Spec: api.PodSpec{
 | 
			
		||||
				Containers: []api.Container{
 | 
			
		||||
					{
 | 
			
		||||
						Name:    containerName,
 | 
			
		||||
						Image:   "gcr.io/google_containers/busybox",
 | 
			
		||||
						Command: []string{"/bin/sh", "-c", "sleep 5", "/crash/missing"},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		defer func() {
 | 
			
		||||
			By("deleting the pod")
 | 
			
		||||
			podClient.Delete(pod.Name, api.NewDeleteOptions(0))
 | 
			
		||||
		}()
 | 
			
		||||
 | 
			
		||||
		runPodFromStruct(framework, pod)
 | 
			
		||||
		time.Sleep(2 * kubelet.MaxContainerBackOff) // it takes slightly more than 2*x to get to a back-off of x
 | 
			
		||||
 | 
			
		||||
		// wait for a delay == capped delay of MaxContainerBackOff
 | 
			
		||||
		By("geting restart delay when capped")
 | 
			
		||||
		var (
 | 
			
		||||
			delay1 time.Duration
 | 
			
		||||
			err    error
 | 
			
		||||
		)
 | 
			
		||||
		for i := 0; i < 3; i++ {
 | 
			
		||||
			delay1, err = getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if delay1 < kubelet.MaxContainerBackOff {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (delay1 < kubelet.MaxContainerBackOff) || (delay1 > maxBackOffTolerance) {
 | 
			
		||||
			Failf("expected %s back-off got=%s in delay1", kubelet.MaxContainerBackOff, delay1)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		By("getting restart delay after a capped delay")
 | 
			
		||||
		delay2, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if delay2 < kubelet.MaxContainerBackOff || delay2 > maxBackOffTolerance { // syncloop cumulative drift
 | 
			
		||||
			Failf("expected %s back-off got=%s on delay2", kubelet.MaxContainerBackOff, delay2)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// The following tests for remote command execution and port forwarding are
 | 
			
		||||
	// commented out because the GCE environment does not currently have nsenter
 | 
			
		||||
	// in the kubelet's PATH, nor does it have socat installed. Once we figure
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user