Added an node e2e test for pod evictions due to disk pressure.
This commit is contained in:
		
				
					committed by
					
						
						bindata-mockuser
					
				
			
			
				
	
			
			
			
						parent
						
							a3e339b6d6
						
					
				
				
					commit
					adf6b6ddf0
				
			@@ -71,6 +71,8 @@ type TestContextType struct {
 | 
				
			|||||||
	NodeName string
 | 
						NodeName string
 | 
				
			||||||
	// Whether to enable the QoS Cgroup Hierarchy or not
 | 
						// Whether to enable the QoS Cgroup Hierarchy or not
 | 
				
			||||||
	CgroupsPerQOS bool
 | 
						CgroupsPerQOS bool
 | 
				
			||||||
 | 
						// The hard eviction thresholds
 | 
				
			||||||
 | 
						EvictionHard string
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type CloudConfig struct {
 | 
					type CloudConfig struct {
 | 
				
			||||||
@@ -150,4 +152,5 @@ func RegisterClusterFlags() {
 | 
				
			|||||||
func RegisterNodeFlags() {
 | 
					func RegisterNodeFlags() {
 | 
				
			||||||
	flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).")
 | 
						flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).")
 | 
				
			||||||
	flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
 | 
						flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
 | 
				
			||||||
 | 
						flag.StringVar(&TestContext.EvictionHard, "eviction-hard", "", "The hard eviction thresholds. If set, pods get evicted when the specified resources drop below the thresholds.")
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										190
									
								
								test/e2e_node/disk_eviction_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										190
									
								
								test/e2e_node/disk_eviction_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,190 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2016 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package e2e_node
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
						"os/exec"
 | 
				
			||||||
 | 
						"strings"
 | 
				
			||||||
 | 
						"time"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/kubelet/dockertools"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/util/uuid"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/test/e2e/framework"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						. "github.com/onsi/ginkgo"
 | 
				
			||||||
 | 
						. "github.com/onsi/gomega"
 | 
				
			||||||
 | 
						client "k8s.io/kubernetes/pkg/client/unversioned"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const (
 | 
				
			||||||
 | 
						// podCheckInterval is the interval seconds between pod status checks.
 | 
				
			||||||
 | 
						podCheckInterval = time.Second * 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dummyFile = "dummy."
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO: Leverage dynamic Kubelet settings when it's implemented to only modify the kubelet eviction option in this test.
 | 
				
			||||||
 | 
					// To manually trigger the test on a node with disk space just over 15Gi :
 | 
				
			||||||
 | 
					//   make test-e2e-node FOCUS="hard eviction test" TEST_ARGS="--eviction-hard=nodefs.available<15Gi"
 | 
				
			||||||
 | 
					var _ = framework.KubeDescribe("Kubelet Eviction Manager [FLAKY] [Serial] [Disruptive]", func() {
 | 
				
			||||||
 | 
						f := framework.NewDefaultFramework("kubelet-eviction-manager")
 | 
				
			||||||
 | 
						var podClient *framework.PodClient
 | 
				
			||||||
 | 
						var c *client.Client
 | 
				
			||||||
 | 
						var n *api.Node
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						BeforeEach(func() {
 | 
				
			||||||
 | 
							podClient = f.PodClient()
 | 
				
			||||||
 | 
							c = f.Client
 | 
				
			||||||
 | 
							nodeList := framework.GetReadySchedulableNodesOrDie(c)
 | 
				
			||||||
 | 
							n = &nodeList.Items[0]
 | 
				
			||||||
 | 
						})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Describe("hard eviction test", func() {
 | 
				
			||||||
 | 
							Context("pod using the most disk space gets evicted when the node disk usage is above the eviction hard threshold", func() {
 | 
				
			||||||
 | 
								var busyPodName, idlePodName string
 | 
				
			||||||
 | 
								var containersToCleanUp map[string]bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								AfterEach(func() {
 | 
				
			||||||
 | 
									podClient.Delete(busyPodName, &api.DeleteOptions{})
 | 
				
			||||||
 | 
									podClient.Delete(idlePodName, &api.DeleteOptions{})
 | 
				
			||||||
 | 
									for container := range containersToCleanUp {
 | 
				
			||||||
 | 
										// TODO: to be container implementation agnostic
 | 
				
			||||||
 | 
										cmd := exec.Command("docker", "rm", "-f", strings.Trim(container, dockertools.DockerPrefix))
 | 
				
			||||||
 | 
										cmd.Run()
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								BeforeEach(func() {
 | 
				
			||||||
 | 
									if !evictionOptionIsSet() {
 | 
				
			||||||
 | 
										return
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									busyPodName = "to-evict" + string(uuid.NewUUID())
 | 
				
			||||||
 | 
									idlePodName = "idle" + string(uuid.NewUUID())
 | 
				
			||||||
 | 
									containersToCleanUp = make(map[string]bool)
 | 
				
			||||||
 | 
									podClient.Create(&api.Pod{
 | 
				
			||||||
 | 
										ObjectMeta: api.ObjectMeta{
 | 
				
			||||||
 | 
											Name: idlePodName,
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										Spec: api.PodSpec{
 | 
				
			||||||
 | 
											RestartPolicy: api.RestartPolicyNever,
 | 
				
			||||||
 | 
											Containers: []api.Container{
 | 
				
			||||||
 | 
												{
 | 
				
			||||||
 | 
													Image: ImageRegistry[pauseImage],
 | 
				
			||||||
 | 
													Name:  idlePodName,
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									})
 | 
				
			||||||
 | 
									podClient.Create(&api.Pod{
 | 
				
			||||||
 | 
										ObjectMeta: api.ObjectMeta{
 | 
				
			||||||
 | 
											Name: busyPodName,
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										Spec: api.PodSpec{
 | 
				
			||||||
 | 
											RestartPolicy: api.RestartPolicyNever,
 | 
				
			||||||
 | 
											Containers: []api.Container{
 | 
				
			||||||
 | 
												{
 | 
				
			||||||
 | 
													Image: ImageRegistry[busyBoxImage],
 | 
				
			||||||
 | 
													Name:  busyPodName,
 | 
				
			||||||
 | 
													// Filling the disk
 | 
				
			||||||
 | 
													Command: []string{"sh", "-c",
 | 
				
			||||||
 | 
														fmt.Sprintf("for NUM in `seq 1 1 1000`; do dd if=/dev/urandom of=%s.$NUM bs=4000000 count=10; sleep 3; done",
 | 
				
			||||||
 | 
															dummyFile)},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									})
 | 
				
			||||||
 | 
								})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								It("should evict the pod using the most disk space", func() {
 | 
				
			||||||
 | 
									if !evictionOptionIsSet() {
 | 
				
			||||||
 | 
										framework.Logf("test skipped because eviction option is not set")
 | 
				
			||||||
 | 
										return
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									evictionOccurred := false
 | 
				
			||||||
 | 
									Eventually(func() error {
 | 
				
			||||||
 | 
										if !evictionOccurred {
 | 
				
			||||||
 | 
											podData, err := podClient.Get(busyPodName)
 | 
				
			||||||
 | 
											if err != nil {
 | 
				
			||||||
 | 
												return err
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
											recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
											err = verifyPodEviction(podData)
 | 
				
			||||||
 | 
											if err != nil {
 | 
				
			||||||
 | 
												return err
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
											if !nodeHasDiskPressure(f.Client) {
 | 
				
			||||||
 | 
												return fmt.Errorf("expected disk pressure condition is not set")
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
											podData, err = podClient.Get(idlePodName)
 | 
				
			||||||
 | 
											if err != nil {
 | 
				
			||||||
 | 
												return err
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
											recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
											if podData.Status.Phase != api.PodRunning {
 | 
				
			||||||
 | 
												return fmt.Errorf("expected phase to be running. got %+v", podData.Status.Phase)
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
											evictionOccurred = true
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										// After eviction happens the pod is evicted so eventually the node disk pressure should be gone.
 | 
				
			||||||
 | 
										if nodeHasDiskPressure(f.Client) {
 | 
				
			||||||
 | 
											return fmt.Errorf("expected disk pressure condition relief has not happened")
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
										return nil
 | 
				
			||||||
 | 
									}, time.Minute*5, podCheckInterval).Should(BeNil())
 | 
				
			||||||
 | 
								})
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						})
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func verifyPodEviction(podData *api.Pod) error {
 | 
				
			||||||
 | 
						if podData.Status.Phase != api.PodFailed {
 | 
				
			||||||
 | 
							return fmt.Errorf("expected phase to be failed. got %+v", podData.Status.Phase)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if podData.Status.Reason != "Evicted" {
 | 
				
			||||||
 | 
							return fmt.Errorf("expected failed reason to be evicted. got %+v", podData.Status.Reason)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func nodeHasDiskPressure(c *client.Client) bool {
 | 
				
			||||||
 | 
						nodeList := framework.GetReadySchedulableNodesOrDie(c)
 | 
				
			||||||
 | 
						for _, condition := range nodeList.Items[0].Status.Conditions {
 | 
				
			||||||
 | 
							if condition.Type == api.NodeDiskPressure {
 | 
				
			||||||
 | 
								return condition.Status == api.ConditionTrue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return false
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func recordContainerId(containersToCleanUp map[string]bool, containerStatuses []api.ContainerStatus) {
 | 
				
			||||||
 | 
						for _, status := range containerStatuses {
 | 
				
			||||||
 | 
							containersToCleanUp[status.ContainerID] = true
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func evictionOptionIsSet() bool {
 | 
				
			||||||
 | 
						return len(framework.TestContext.EvictionHard) > 0
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -118,7 +118,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	shared := &SharedContext{}
 | 
						shared := &SharedContext{}
 | 
				
			||||||
	if *startServices {
 | 
						if *startServices {
 | 
				
			||||||
		e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, shared)
 | 
							e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, framework.TestContext.EvictionHard, shared)
 | 
				
			||||||
		if err := e2es.start(); err != nil {
 | 
							if err := e2es.start(); err != nil {
 | 
				
			||||||
			Fail(fmt.Sprintf("Unable to start node services.\n%v", err))
 | 
								Fail(fmt.Sprintf("Unable to start node services.\n%v", err))
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,6 +47,7 @@ type e2eService struct {
 | 
				
			|||||||
	nodeName      string
 | 
						nodeName      string
 | 
				
			||||||
	logFiles      map[string]logFileData
 | 
						logFiles      map[string]logFileData
 | 
				
			||||||
	cgroupsPerQOS bool
 | 
						cgroupsPerQOS bool
 | 
				
			||||||
 | 
						evictionHard  string
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type logFileData struct {
 | 
					type logFileData struct {
 | 
				
			||||||
@@ -61,7 +62,7 @@ const (
 | 
				
			|||||||
	defaultEtcdPath = "/tmp/etcd"
 | 
						defaultEtcdPath = "/tmp/etcd"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext) *e2eService {
 | 
					func newE2eService(nodeName string, cgroupsPerQOS bool, evictionHard string, context *SharedContext) *e2eService {
 | 
				
			||||||
	// Special log files that need to be collected for additional debugging.
 | 
						// Special log files that need to be collected for additional debugging.
 | 
				
			||||||
	var logFiles = map[string]logFileData{
 | 
						var logFiles = map[string]logFileData{
 | 
				
			||||||
		"kern.log":   {[]string{"/var/log/kern.log"}, []string{"-k"}},
 | 
							"kern.log":   {[]string{"/var/log/kern.log"}, []string{"-k"}},
 | 
				
			||||||
@@ -73,6 +74,7 @@ func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext)
 | 
				
			|||||||
		nodeName:      nodeName,
 | 
							nodeName:      nodeName,
 | 
				
			||||||
		logFiles:      logFiles,
 | 
							logFiles:      logFiles,
 | 
				
			||||||
		cgroupsPerQOS: cgroupsPerQOS,
 | 
							cgroupsPerQOS: cgroupsPerQOS,
 | 
				
			||||||
 | 
							evictionHard:  evictionHard,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -256,6 +258,8 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
 | 
				
			|||||||
		"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
 | 
							"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
 | 
				
			||||||
		"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
 | 
							"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
 | 
				
			||||||
		"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
 | 
							"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
 | 
				
			||||||
 | 
							"--eviction-hard", es.evictionHard,
 | 
				
			||||||
 | 
							"--eviction-pressure-transition-period", "30s",
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
	if es.cgroupsPerQOS {
 | 
						if es.cgroupsPerQOS {
 | 
				
			||||||
		cmdArgs = append(cmdArgs,
 | 
							cmdArgs = append(cmdArgs,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user