Merge pull request #35572 from bprashanth/ip_gc

Automatic merge from submit-queue

GC pod ips

Finally managed to write a *failing* test. 
Supersedes https://github.com/kubernetes/kubernetes/pull/34373

```release-note
GC pod ips
```
This commit is contained in:
Kubernetes Submit Queue
2016-10-28 14:44:28 -07:00
committed by GitHub
5 changed files with 240 additions and 5 deletions

View File

@@ -66,6 +66,7 @@ go_test(
"memory_eviction_test.go",
"mirror_pod_test.go",
"resource_usage_test.go",
"restart_test.go",
"runtime_conformance_test.go",
"summary_test.go",
],
@@ -95,6 +96,7 @@ go_test(
"//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/e2e_node/services:go_default_library",
"//test/utils:go_default_library",
"//vendor:github.com/davecgh/go-spew/spew",
"//vendor:github.com/golang/glog",
"//vendor:github.com/onsi/ginkgo",

View File

@@ -0,0 +1,117 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e_node
import (
"k8s.io/kubernetes/test/e2e/framework"
"time"
"fmt"
. "github.com/onsi/ginkgo"
"k8s.io/kubernetes/pkg/api"
testutils "k8s.io/kubernetes/test/utils"
"os/exec"
)
// waitForPods waits for timeout duration, for pod_count.
// If the timeout is hit, it returns the list of currently running pods.
func waitForPods(f *framework.Framework, pod_count int, timeout time.Duration) (runningPods []*api.Pod) {
for start := time.Now(); time.Since(start) < timeout; time.Sleep(10 * time.Second) {
podList, err := f.PodClient().List(api.ListOptions{})
if err != nil {
framework.Logf("Failed to list pods on node: %v", err)
continue
}
runningPods = []*api.Pod{}
for _, pod := range podList.Items {
if r, err := testutils.PodRunningReady(&pod); err != nil || !r {
continue
}
runningPods = append(runningPods, &pod)
}
framework.Logf("Running pod count %d", len(runningPods))
if len(runningPods) >= pod_count {
break
}
}
return runningPods
}
var _ = framework.KubeDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
const (
// Saturate the node. It's not necessary that all these pods enter
// Running/Ready, because we don't know the number of cores in the
// test node or default limits applied (if any). It's is essential
// that no containers end up in terminated. 100 was chosen because
// it's the max pods per node.
podCount = 100
podCreationInterval = 100 * time.Millisecond
recoverTimeout = 5 * time.Minute
startTimeout = 3 * time.Minute
// restartCount is chosen so even with minPods we exhaust the default
// allocation of a /24.
minPods = 50
restartCount = 6
)
f := framework.NewDefaultFramework("restart-test")
Context("Docker Daemon", func() {
Context("Network", func() {
It("should recover from ip leak", func() {
pods := newTestPods(podCount, framework.GetPauseImageNameForHostArch(), "restart-docker-test")
By(fmt.Sprintf("Trying to create %d pods on node", len(pods)))
createBatchPodWithRateControl(f, pods, podCreationInterval)
defer deletePodsSync(f, pods)
// Give the node some time to stabilize, assume pods that enter RunningReady within
// startTimeout fit on the node and the node is now saturated.
runningPods := waitForPods(f, podCount, startTimeout)
if len(runningPods) < minPods {
framework.Failf("Failed to start %d pods, cannot test that restarting docker doesn't leak IPs", minPods)
}
for i := 0; i < restartCount; i += 1 {
By(fmt.Sprintf("Restarting Docker Daemon iteration %d", i))
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
if stdout, err := exec.Command("sudo", "systemctl", "restart", "docker").CombinedOutput(); err != nil {
framework.Logf("Failed to trigger docker restart with systemd/systemctl: %v, stdout: %q", err, string(stdout))
if stdout, err = exec.Command("sudo", "service", "docker", "restart").CombinedOutput(); err != nil {
framework.Failf("Failed to trigger docker restart with upstart/service: %v, stdout: %q", err, string(stdout))
}
}
time.Sleep(20 * time.Second)
}
By("Checking currently Running/Ready pods")
postRestartRunningPods := waitForPods(f, len(runningPods), recoverTimeout)
if len(postRestartRunningPods) == 0 {
framework.Failf("Failed to start *any* pods after docker restart, this might indicate an IP leak")
}
By("Confirm no containers have terminated")
for _, pod := range postRestartRunningPods {
if c := testutils.TerminatedContainers(pod); len(c) != 0 {
framework.Failf("Pod %q has failed containers %+v after docker restart, this might indicate an IP leak", pod.Name, c)
}
}
By(fmt.Sprintf("Docker restart test passed with %d pods", len(postRestartRunningPods)))
})
})
})
})

View File

@@ -343,6 +343,7 @@ ResourceQuota should create a ResourceQuota and capture the life of a service.,t
ResourceQuota should create a ResourceQuota and ensure its status is promptly calculated.,krousey,1
ResourceQuota should verify ResourceQuota with best effort scope.,mml,1
ResourceQuota should verify ResourceQuota with terminating scopes.,ncdc,1
Restart Docker Daemon Network should recover from ip leak,bprashanth,0
Restart should restart all nodes and ensure all nodes and pods recover,andyzheng0831,1
RethinkDB should create and stop rethinkdb servers,mwielgus,1
SSH should SSH to all nodes and run commands,quinton-hoole,0
1 name owner auto-assigned
343 ResourceQuota should create a ResourceQuota and ensure its status is promptly calculated. krousey 1
344 ResourceQuota should verify ResourceQuota with best effort scope. mml 1
345 ResourceQuota should verify ResourceQuota with terminating scopes. ncdc 1
346 Restart Docker Daemon Network should recover from ip leak bprashanth 0
347 Restart should restart all nodes and ensure all nodes and pods recover andyzheng0831 1
348 RethinkDB should create and stop rethinkdb servers mwielgus 1
349 SSH should SSH to all nodes and run commands quinton-hoole 0

View File

@@ -82,6 +82,23 @@ func FailedContainers(pod *api.Pod) map[string]ContainerFailures {
return states
}
// TerminatedContainers inspects all containers in a pod and returns a map
// of "container name: termination reason", for all currently terminated
// containers.
func TerminatedContainers(pod *api.Pod) map[string]string {
states := make(map[string]string)
statuses := pod.Status.ContainerStatuses
if len(statuses) == 0 {
return states
}
for _, status := range statuses {
if status.State.Terminated != nil {
states[status.Name] = status.State.Terminated.Reason
}
}
return states
}
// PodNotReady checks whether pod p's has a ready condition of status false.
func PodNotReady(p *api.Pod) (bool, error) {
// Check the ready condition is false.