remove flaky label from eviction tests
This commit is contained in:
		@@ -98,7 +98,6 @@ go_test(
 | 
			
		||||
        "kubelet_test.go",
 | 
			
		||||
        "lifecycle_hook_test.go",
 | 
			
		||||
        "log_path_test.go",
 | 
			
		||||
        "memory_eviction_test.go",
 | 
			
		||||
        "mirror_pod_test.go",
 | 
			
		||||
        "pods_container_manager_test.go",
 | 
			
		||||
        "runtime_conformance_test.go",
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ package e2e_node
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/api/core/v1"
 | 
			
		||||
@@ -55,7 +56,7 @@ const (
 | 
			
		||||
 | 
			
		||||
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
 | 
			
		||||
// Node disk pressure is induced by consuming all inodes on the node.
 | 
			
		||||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("inode-eviction-test")
 | 
			
		||||
	expectedNodeCondition := v1.NodeDiskPressure
 | 
			
		||||
	pressureTimeout := 15 * time.Minute
 | 
			
		||||
@@ -90,7 +91,7 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flak
 | 
			
		||||
 | 
			
		||||
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
 | 
			
		||||
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
 | 
			
		||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
 | 
			
		||||
	expectedNodeCondition := v1.NodeMemoryPressure
 | 
			
		||||
	pressureTimeout := 10 * time.Minute
 | 
			
		||||
@@ -122,7 +123,7 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru
 | 
			
		||||
 | 
			
		||||
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
 | 
			
		||||
// Disk pressure is induced by running pods which consume disk space.
 | 
			
		||||
var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("localstorage-eviction-test")
 | 
			
		||||
	pressureTimeout := 10 * time.Minute
 | 
			
		||||
	expectedNodeCondition := v1.NodeDiskPressure
 | 
			
		||||
@@ -150,7 +151,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive
 | 
			
		||||
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
 | 
			
		||||
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
 | 
			
		||||
// Note: This test's purpose is to test Soft Evictions.  Local storage was chosen since it is the least costly to run.
 | 
			
		||||
var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("localstorage-eviction-test")
 | 
			
		||||
	pressureTimeout := 10 * time.Minute
 | 
			
		||||
	expectedNodeCondition := v1.NodeDiskPressure
 | 
			
		||||
@@ -184,7 +185,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
 | 
			
		||||
var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Flaky] [Feature:LocalStorageCapacityIsolation]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Feature:LocalStorageCapacityIsolation]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("localstorage-eviction-test")
 | 
			
		||||
	evictionTestTimeout := 10 * time.Minute
 | 
			
		||||
	Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
 | 
			
		||||
@@ -236,7 +237,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
 | 
			
		||||
// PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods.
 | 
			
		||||
// This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before
 | 
			
		||||
// the higher priority pod.
 | 
			
		||||
var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test")
 | 
			
		||||
	expectedNodeCondition := v1.NodeMemoryPressure
 | 
			
		||||
	pressureTimeout := 10 * time.Minute
 | 
			
		||||
@@ -282,7 +283,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [
 | 
			
		||||
// PriorityLocalStorageEvictionOrdering tests that the node responds to node disk pressure by evicting pods.
 | 
			
		||||
// This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before
 | 
			
		||||
// the higher priority pod.
 | 
			
		||||
var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() {
 | 
			
		||||
var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test")
 | 
			
		||||
	expectedNodeCondition := v1.NodeDiskPressure
 | 
			
		||||
	pressureTimeout := 10 * time.Minute
 | 
			
		||||
@@ -668,3 +669,50 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod {
 | 
			
		||||
	env := []v1.EnvVar{
 | 
			
		||||
		{
 | 
			
		||||
			Name: "MEMORY_LIMIT",
 | 
			
		||||
			ValueFrom: &v1.EnvVarSource{
 | 
			
		||||
				ResourceFieldRef: &v1.ResourceFieldSelector{
 | 
			
		||||
					Resource: "limits.memory",
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API
 | 
			
		||||
	// to pass limits.memory, which will be the total memory available.
 | 
			
		||||
	// This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit,
 | 
			
		||||
	// which will cause the test to fail inappropriately.
 | 
			
		||||
	var memLimit string
 | 
			
		||||
	if limit, ok := res.Limits[v1.ResourceMemory]; ok {
 | 
			
		||||
		memLimit = strconv.Itoa(int(
 | 
			
		||||
			float64(limit.Value()) * 0.8))
 | 
			
		||||
	} else {
 | 
			
		||||
		memLimit = "$(MEMORY_LIMIT)"
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &v1.Pod{
 | 
			
		||||
		ObjectMeta: metav1.ObjectMeta{
 | 
			
		||||
			Name: podName,
 | 
			
		||||
		},
 | 
			
		||||
		Spec: v1.PodSpec{
 | 
			
		||||
			RestartPolicy: v1.RestartPolicyNever,
 | 
			
		||||
			Containers: []v1.Container{
 | 
			
		||||
				{
 | 
			
		||||
					Name:            ctnName,
 | 
			
		||||
					Image:           "k8s.gcr.io/stress:v1",
 | 
			
		||||
					ImagePullPolicy: "Always",
 | 
			
		||||
					Env:             env,
 | 
			
		||||
					// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
 | 
			
		||||
					// to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
 | 
			
		||||
					// We might see flakes due to timeout if the total memory on the nodes increases.
 | 
			
		||||
					Args:      []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit},
 | 
			
		||||
					Resources: res,
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,287 +0,0 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package e2e_node
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/golang/glog"
 | 
			
		||||
	"k8s.io/api/core/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/api/resource"
 | 
			
		||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
			
		||||
	nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
 | 
			
		||||
	"k8s.io/kubernetes/test/e2e/framework"
 | 
			
		||||
 | 
			
		||||
	. "github.com/onsi/ginkgo"
 | 
			
		||||
	. "github.com/onsi/gomega"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Eviction Policy is described here:
 | 
			
		||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
 | 
			
		||||
 | 
			
		||||
var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", func() {
 | 
			
		||||
	var (
 | 
			
		||||
		evictionHard = map[string]string{"memory.available": "40%"}
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	f := framework.NewDefaultFramework("eviction-test")
 | 
			
		||||
 | 
			
		||||
	// This is a dummy context to wrap the outer AfterEach, which will run after the inner AfterEach.
 | 
			
		||||
	// We want to list all of the node and pod events, including any that occur while waiting for
 | 
			
		||||
	// memory pressure reduction, even if we time out while waiting.
 | 
			
		||||
	Context("", func() {
 | 
			
		||||
 | 
			
		||||
		AfterEach(func() {
 | 
			
		||||
			// Print events
 | 
			
		||||
			logNodeEvents(f)
 | 
			
		||||
			logPodEvents(f)
 | 
			
		||||
		})
 | 
			
		||||
		Context("", func() {
 | 
			
		||||
			tempSetCurrentKubeletConfig(f, func(c *kubeletconfig.KubeletConfiguration) {
 | 
			
		||||
				c.EvictionHard = evictionHard
 | 
			
		||||
			})
 | 
			
		||||
 | 
			
		||||
			Context("when there is memory pressure", func() {
 | 
			
		||||
				AfterEach(func() {
 | 
			
		||||
					// Wait for the memory pressure condition to disappear from the node status before continuing.
 | 
			
		||||
					By("waiting for the memory pressure condition on the node to disappear before ending the test.")
 | 
			
		||||
					Eventually(func() error {
 | 
			
		||||
						nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
 | 
			
		||||
						if err != nil {
 | 
			
		||||
							return fmt.Errorf("tried to get node list but got error: %v", err)
 | 
			
		||||
						}
 | 
			
		||||
						// Assuming that there is only one node, because this is a node e2e test.
 | 
			
		||||
						if len(nodeList.Items) != 1 {
 | 
			
		||||
							return fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
 | 
			
		||||
						}
 | 
			
		||||
						node := nodeList.Items[0]
 | 
			
		||||
						_, pressure := nodeutil.GetNodeCondition(&node.Status, v1.NodeMemoryPressure)
 | 
			
		||||
						if pressure != nil && pressure.Status == v1.ConditionTrue {
 | 
			
		||||
							return fmt.Errorf("node is still reporting memory pressure condition: %s", pressure)
 | 
			
		||||
						}
 | 
			
		||||
						return nil
 | 
			
		||||
					}, 5*time.Minute, 15*time.Second).Should(BeNil())
 | 
			
		||||
 | 
			
		||||
					// Check available memory after condition disappears, just in case:
 | 
			
		||||
					// Wait for available memory to decrease to a reasonable level before ending the test.
 | 
			
		||||
					// This helps prevent interference with tests that start immediately after this one.
 | 
			
		||||
					By("waiting for available memory to decrease to a reasonable level before ending the test.")
 | 
			
		||||
					Eventually(func() error {
 | 
			
		||||
						summary, err := getNodeSummary()
 | 
			
		||||
						if err != nil {
 | 
			
		||||
							return err
 | 
			
		||||
						}
 | 
			
		||||
						if summary.Node.Memory.AvailableBytes == nil {
 | 
			
		||||
							return fmt.Errorf("summary.Node.Memory.AvailableBytes was nil, cannot get memory stats.")
 | 
			
		||||
						}
 | 
			
		||||
						if summary.Node.Memory.WorkingSetBytes == nil {
 | 
			
		||||
							return fmt.Errorf("summary.Node.Memory.WorkingSetBytes was nil, cannot get memory stats.")
 | 
			
		||||
						}
 | 
			
		||||
						avail := *summary.Node.Memory.AvailableBytes
 | 
			
		||||
						wset := *summary.Node.Memory.WorkingSetBytes
 | 
			
		||||
 | 
			
		||||
						// memory limit = avail + wset
 | 
			
		||||
						limit := avail + wset
 | 
			
		||||
						halflimit := limit / 2
 | 
			
		||||
 | 
			
		||||
						// Wait for at least half of memory limit to be available
 | 
			
		||||
						if avail >= halflimit {
 | 
			
		||||
							return nil
 | 
			
		||||
						}
 | 
			
		||||
						return fmt.Errorf("current available memory is: %d bytes. Expected at least %d bytes available.", avail, halflimit)
 | 
			
		||||
					}, 5*time.Minute, 15*time.Second).Should(BeNil())
 | 
			
		||||
 | 
			
		||||
					// TODO(mtaufen): 5 minute wait to stop flaky test bleeding while we figure out what is actually going on.
 | 
			
		||||
					//                If related to pressure transition period in eviction manager, probably only need to wait
 | 
			
		||||
					//                just over 30s becasue that is the transition period set for node e2e tests. But since we
 | 
			
		||||
					//                know 5 min works and we don't know if transition period is the problem, wait 5 min for now.
 | 
			
		||||
					time.Sleep(5 * time.Minute)
 | 
			
		||||
 | 
			
		||||
					// Finally, try starting a new pod and wait for it to be scheduled and running.
 | 
			
		||||
					// This is the final check to try to prevent interference with subsequent tests.
 | 
			
		||||
					podName := "admit-best-effort-pod"
 | 
			
		||||
					f.PodClient().CreateSync(&v1.Pod{
 | 
			
		||||
						ObjectMeta: metav1.ObjectMeta{
 | 
			
		||||
							Name: podName,
 | 
			
		||||
						},
 | 
			
		||||
						Spec: v1.PodSpec{
 | 
			
		||||
							RestartPolicy: v1.RestartPolicyNever,
 | 
			
		||||
							Containers: []v1.Container{
 | 
			
		||||
								{
 | 
			
		||||
									Image: framework.GetPauseImageNameForHostArch(),
 | 
			
		||||
									Name:  podName,
 | 
			
		||||
								},
 | 
			
		||||
							},
 | 
			
		||||
						},
 | 
			
		||||
					})
 | 
			
		||||
				})
 | 
			
		||||
 | 
			
		||||
				It("should evict pods in the correct order (besteffort first, then burstable, then guaranteed)", func() {
 | 
			
		||||
					By("creating a guaranteed pod, a burstable pod, and a besteffort pod.")
 | 
			
		||||
 | 
			
		||||
					// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
 | 
			
		||||
					guaranteed := getMemhogPod("guaranteed-pod", "guaranteed", v1.ResourceRequirements{
 | 
			
		||||
						Requests: v1.ResourceList{
 | 
			
		||||
							v1.ResourceCPU:    resource.MustParse("100m"),
 | 
			
		||||
							v1.ResourceMemory: resource.MustParse("100Mi"),
 | 
			
		||||
						},
 | 
			
		||||
						Limits: v1.ResourceList{
 | 
			
		||||
							v1.ResourceCPU:    resource.MustParse("100m"),
 | 
			
		||||
							v1.ResourceMemory: resource.MustParse("100Mi"),
 | 
			
		||||
						}})
 | 
			
		||||
					guaranteed = f.PodClient().CreateSync(guaranteed)
 | 
			
		||||
					glog.Infof("pod created with name: %s", guaranteed.Name)
 | 
			
		||||
 | 
			
		||||
					// A pod is burstable if limits and requests do not match across all containers.
 | 
			
		||||
					burstable := getMemhogPod("burstable-pod", "burstable", v1.ResourceRequirements{
 | 
			
		||||
						Requests: v1.ResourceList{
 | 
			
		||||
							v1.ResourceCPU:    resource.MustParse("100m"),
 | 
			
		||||
							v1.ResourceMemory: resource.MustParse("100Mi"),
 | 
			
		||||
						}})
 | 
			
		||||
					burstable = f.PodClient().CreateSync(burstable)
 | 
			
		||||
					glog.Infof("pod created with name: %s", burstable.Name)
 | 
			
		||||
 | 
			
		||||
					// A pod is besteffort if none of its containers have specified any requests or limits	.
 | 
			
		||||
					besteffort := getMemhogPod("besteffort-pod", "besteffort", v1.ResourceRequirements{})
 | 
			
		||||
					besteffort = f.PodClient().CreateSync(besteffort)
 | 
			
		||||
					glog.Infof("pod created with name: %s", besteffort.Name)
 | 
			
		||||
 | 
			
		||||
					// We poll until timeout or all pods are killed.
 | 
			
		||||
					// Inside the func, we check that all pods are in a valid phase with
 | 
			
		||||
					// respect to the eviction order of best effort, then burstable, then guaranteed.
 | 
			
		||||
					By("polling the Status.Phase of each pod and checking for violations of the eviction order.")
 | 
			
		||||
					Eventually(func() error {
 | 
			
		||||
 | 
			
		||||
						gteed, gtErr := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(guaranteed.Name, metav1.GetOptions{})
 | 
			
		||||
						framework.ExpectNoError(gtErr, fmt.Sprintf("getting pod %s", guaranteed.Name))
 | 
			
		||||
						gteedPh := gteed.Status.Phase
 | 
			
		||||
 | 
			
		||||
						burst, buErr := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(burstable.Name, metav1.GetOptions{})
 | 
			
		||||
						framework.ExpectNoError(buErr, fmt.Sprintf("getting pod %s", burstable.Name))
 | 
			
		||||
						burstPh := burst.Status.Phase
 | 
			
		||||
 | 
			
		||||
						best, beErr := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(besteffort.Name, metav1.GetOptions{})
 | 
			
		||||
						framework.ExpectNoError(beErr, fmt.Sprintf("getting pod %s", besteffort.Name))
 | 
			
		||||
						bestPh := best.Status.Phase
 | 
			
		||||
 | 
			
		||||
						glog.Infof("pod phase: guaranteed: %v, burstable: %v, besteffort: %v", gteedPh, burstPh, bestPh)
 | 
			
		||||
 | 
			
		||||
						// NOTE/TODO(mtaufen): This should help us debug why burstable appears to fail before besteffort in some
 | 
			
		||||
						//                     scenarios. We have seen some evidence that the eviction manager has in fact done the
 | 
			
		||||
						//                     right thing and evicted the besteffort first, and attempted to change the besteffort phase
 | 
			
		||||
						//                     to "Failed" when it evicts it, but that for some reason the test isn't seeing the updated
 | 
			
		||||
						//                     phase. I'm trying to confirm or deny this.
 | 
			
		||||
						//                     The eviction manager starts trying to evict things when the node comes under memory
 | 
			
		||||
						//                     pressure, and the eviction manager reports this information in the pressure condition. If we
 | 
			
		||||
						//                     see the eviction manager reporting a pressure condition for a while without the besteffort failing,
 | 
			
		||||
						//                     and we see that the manager did in fact evict the besteffort (this should be in the Kubelet log), we
 | 
			
		||||
						//                     will have more reason to believe the phase is out of date.
 | 
			
		||||
						nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
 | 
			
		||||
						if err != nil {
 | 
			
		||||
							glog.Errorf("tried to get node list but got error: %v", err)
 | 
			
		||||
						}
 | 
			
		||||
						if len(nodeList.Items) != 1 {
 | 
			
		||||
							glog.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
 | 
			
		||||
						}
 | 
			
		||||
						node := nodeList.Items[0]
 | 
			
		||||
						_, pressure := nodeutil.GetNodeCondition(&node.Status, v1.NodeMemoryPressure)
 | 
			
		||||
						glog.Infof("node pressure condition: %s", pressure)
 | 
			
		||||
 | 
			
		||||
						// NOTE/TODO(mtaufen): Also log (at least temporarily) the actual memory consumption on the node.
 | 
			
		||||
						//                     I used this to plot memory usage from a successful test run and it looks the
 | 
			
		||||
						//                     way I would expect. I want to see what the plot from a flake looks like.
 | 
			
		||||
						summary, err := getNodeSummary()
 | 
			
		||||
						if err != nil {
 | 
			
		||||
							return err
 | 
			
		||||
						}
 | 
			
		||||
						if summary.Node.Memory.WorkingSetBytes != nil {
 | 
			
		||||
							wset := *summary.Node.Memory.WorkingSetBytes
 | 
			
		||||
							glog.Infof("Node's working set is (bytes): %v", wset)
 | 
			
		||||
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						if bestPh == v1.PodRunning {
 | 
			
		||||
							Expect(burstPh).NotTo(Equal(v1.PodFailed), "burstable pod failed before best effort pod")
 | 
			
		||||
							Expect(gteedPh).NotTo(Equal(v1.PodFailed), "guaranteed pod failed before best effort pod")
 | 
			
		||||
						} else if burstPh == v1.PodRunning {
 | 
			
		||||
							Expect(gteedPh).NotTo(Equal(v1.PodFailed), "guaranteed pod failed before burstable pod")
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						// When both besteffort and burstable have been evicted, the test has completed.
 | 
			
		||||
						if bestPh == v1.PodFailed && burstPh == v1.PodFailed {
 | 
			
		||||
							return nil
 | 
			
		||||
						}
 | 
			
		||||
						return fmt.Errorf("besteffort and burstable have not yet both been evicted.")
 | 
			
		||||
 | 
			
		||||
					}, 60*time.Minute, 5*time.Second).Should(BeNil())
 | 
			
		||||
 | 
			
		||||
				})
 | 
			
		||||
			})
 | 
			
		||||
		})
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod {
 | 
			
		||||
	env := []v1.EnvVar{
 | 
			
		||||
		{
 | 
			
		||||
			Name: "MEMORY_LIMIT",
 | 
			
		||||
			ValueFrom: &v1.EnvVarSource{
 | 
			
		||||
				ResourceFieldRef: &v1.ResourceFieldSelector{
 | 
			
		||||
					Resource: "limits.memory",
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API
 | 
			
		||||
	// to pass limits.memory, which will be the total memory available.
 | 
			
		||||
	// This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit,
 | 
			
		||||
	// which will cause the test to fail inappropriately.
 | 
			
		||||
	var memLimit string
 | 
			
		||||
	if limit, ok := res.Limits[v1.ResourceMemory]; ok {
 | 
			
		||||
		memLimit = strconv.Itoa(int(
 | 
			
		||||
			float64(limit.Value()) * 0.8))
 | 
			
		||||
	} else {
 | 
			
		||||
		memLimit = "$(MEMORY_LIMIT)"
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &v1.Pod{
 | 
			
		||||
		ObjectMeta: metav1.ObjectMeta{
 | 
			
		||||
			Name: podName,
 | 
			
		||||
		},
 | 
			
		||||
		Spec: v1.PodSpec{
 | 
			
		||||
			RestartPolicy: v1.RestartPolicyNever,
 | 
			
		||||
			Containers: []v1.Container{
 | 
			
		||||
				{
 | 
			
		||||
					Name:            ctnName,
 | 
			
		||||
					Image:           "gcr.io/google-containers/stress:v1",
 | 
			
		||||
					ImagePullPolicy: "Always",
 | 
			
		||||
					Env:             env,
 | 
			
		||||
					// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
 | 
			
		||||
					// to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
 | 
			
		||||
					// We might see flakes due to timeout if the total memory on the nodes increases.
 | 
			
		||||
					Args:      []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit},
 | 
			
		||||
					Resources: res,
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user