Merge pull request #57467 from dashpole/move_eviction_tests
Automatic merge from submit-queue (batch tested with PRs 57467, 58996). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Remove flaky label from Eviction tests **What this PR does / why we need it**: All eviction tests in the flaky suite are no longer flaky. Remove the flaky label to move them from the flaky suite to the serial suite. I removed the QoS-based memory eviction test since it does not reflect the current eviction strategy. **Release note**: ```release-note NONE ``` /assign @mtaufen @Random-Liu /sig node /priority important-soon /kind cleanup
This commit is contained in:
		| @@ -96,7 +96,6 @@ go_test( | ||||
|         "kubelet_test.go", | ||||
|         "lifecycle_hook_test.go", | ||||
|         "log_path_test.go", | ||||
|         "memory_eviction_test.go", | ||||
|         "mirror_pod_test.go", | ||||
|         "pods_container_manager_test.go", | ||||
|         "runtime_conformance_test.go", | ||||
|   | ||||
| @@ -19,6 +19,7 @@ package e2e_node | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"time" | ||||
|  | ||||
| 	"k8s.io/api/core/v1" | ||||
| @@ -55,7 +56,7 @@ const ( | ||||
|  | ||||
| // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods. | ||||
| // Node disk pressure is induced by consuming all inodes on the node. | ||||
| var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", func() { | ||||
| 	f := framework.NewDefaultFramework("inode-eviction-test") | ||||
| 	expectedNodeCondition := v1.NodeDiskPressure | ||||
| 	pressureTimeout := 15 * time.Minute | ||||
| @@ -90,7 +91,7 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flak | ||||
|  | ||||
| // MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods. | ||||
| // Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved. | ||||
| var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() { | ||||
| 	f := framework.NewDefaultFramework("memory-allocatable-eviction-test") | ||||
| 	expectedNodeCondition := v1.NodeMemoryPressure | ||||
| 	pressureTimeout := 10 * time.Minute | ||||
| @@ -122,7 +123,7 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru | ||||
|  | ||||
| // LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods | ||||
| // Disk pressure is induced by running pods which consume disk space. | ||||
| var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive]", func() { | ||||
| 	f := framework.NewDefaultFramework("localstorage-eviction-test") | ||||
| 	pressureTimeout := 10 * time.Minute | ||||
| 	expectedNodeCondition := v1.NodeDiskPressure | ||||
| @@ -150,7 +151,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive | ||||
| // LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods | ||||
| // Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold. | ||||
| // Note: This test's purpose is to test Soft Evictions.  Local storage was chosen since it is the least costly to run. | ||||
| var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive]", func() { | ||||
| 	f := framework.NewDefaultFramework("localstorage-eviction-test") | ||||
| 	pressureTimeout := 10 * time.Minute | ||||
| 	expectedNodeCondition := v1.NodeDiskPressure | ||||
| @@ -184,7 +185,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup | ||||
| }) | ||||
|  | ||||
| // LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions | ||||
| var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Flaky] [Feature:LocalStorageCapacityIsolation]", func() { | ||||
| var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Feature:LocalStorageCapacityIsolation]", func() { | ||||
| 	f := framework.NewDefaultFramework("localstorage-eviction-test") | ||||
| 	evictionTestTimeout := 10 * time.Minute | ||||
| 	Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() { | ||||
| @@ -236,7 +237,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se | ||||
| // PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods. | ||||
| // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before | ||||
| // the higher priority pod. | ||||
| var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive]", func() { | ||||
| 	f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test") | ||||
| 	expectedNodeCondition := v1.NodeMemoryPressure | ||||
| 	pressureTimeout := 10 * time.Minute | ||||
| @@ -282,7 +283,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [ | ||||
| // PriorityLocalStorageEvictionOrdering tests that the node responds to node disk pressure by evicting pods. | ||||
| // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before | ||||
| // the higher priority pod. | ||||
| var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive]", func() { | ||||
| 	f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test") | ||||
| 	expectedNodeCondition := v1.NodeDiskPressure | ||||
| 	pressureTimeout := 10 * time.Minute | ||||
| @@ -668,3 +669,50 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod { | ||||
| 	env := []v1.EnvVar{ | ||||
| 		{ | ||||
| 			Name: "MEMORY_LIMIT", | ||||
| 			ValueFrom: &v1.EnvVarSource{ | ||||
| 				ResourceFieldRef: &v1.ResourceFieldSelector{ | ||||
| 					Resource: "limits.memory", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	// If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API | ||||
| 	// to pass limits.memory, which will be the total memory available. | ||||
| 	// This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit, | ||||
| 	// which will cause the test to fail inappropriately. | ||||
| 	var memLimit string | ||||
| 	if limit, ok := res.Limits[v1.ResourceMemory]; ok { | ||||
| 		memLimit = strconv.Itoa(int( | ||||
| 			float64(limit.Value()) * 0.8)) | ||||
| 	} else { | ||||
| 		memLimit = "$(MEMORY_LIMIT)" | ||||
| 	} | ||||
|  | ||||
| 	return &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			Name: podName, | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			RestartPolicy: v1.RestartPolicyNever, | ||||
| 			Containers: []v1.Container{ | ||||
| 				{ | ||||
| 					Name:            ctnName, | ||||
| 					Image:           "k8s.gcr.io/stress:v1", | ||||
| 					ImagePullPolicy: "Always", | ||||
| 					Env:             env, | ||||
| 					// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick | ||||
| 					// to fill ~4Gi of memory, so initial ballpark 12Mi/tick. | ||||
| 					// We might see flakes due to timeout if the total memory on the nodes increases. | ||||
| 					Args:      []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit}, | ||||
| 					Resources: res, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -1,287 +0,0 @@ | ||||
| /* | ||||
| Copyright 2016 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package e2e_node | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/golang/glog" | ||||
| 	"k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	nodeutil "k8s.io/kubernetes/pkg/api/v1/node" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
|  | ||||
| 	. "github.com/onsi/ginkgo" | ||||
| 	. "github.com/onsi/gomega" | ||||
| ) | ||||
|  | ||||
| // Eviction Policy is described here: | ||||
| // https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md | ||||
|  | ||||
| var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", func() { | ||||
| 	var ( | ||||
| 		evictionHard = map[string]string{"memory.available": "40%"} | ||||
| 	) | ||||
|  | ||||
| 	f := framework.NewDefaultFramework("eviction-test") | ||||
|  | ||||
| 	// This is a dummy context to wrap the outer AfterEach, which will run after the inner AfterEach. | ||||
| 	// We want to list all of the node and pod events, including any that occur while waiting for | ||||
| 	// memory pressure reduction, even if we time out while waiting. | ||||
| 	Context("", func() { | ||||
|  | ||||
| 		AfterEach(func() { | ||||
| 			// Print events | ||||
| 			logNodeEvents(f) | ||||
| 			logPodEvents(f) | ||||
| 		}) | ||||
| 		Context("", func() { | ||||
| 			tempSetCurrentKubeletConfig(f, func(c *kubeletconfig.KubeletConfiguration) { | ||||
| 				c.EvictionHard = evictionHard | ||||
| 			}) | ||||
|  | ||||
| 			Context("when there is memory pressure", func() { | ||||
| 				AfterEach(func() { | ||||
| 					// Wait for the memory pressure condition to disappear from the node status before continuing. | ||||
| 					By("waiting for the memory pressure condition on the node to disappear before ending the test.") | ||||
| 					Eventually(func() error { | ||||
| 						nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) | ||||
| 						if err != nil { | ||||
| 							return fmt.Errorf("tried to get node list but got error: %v", err) | ||||
| 						} | ||||
| 						// Assuming that there is only one node, because this is a node e2e test. | ||||
| 						if len(nodeList.Items) != 1 { | ||||
| 							return fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items) | ||||
| 						} | ||||
| 						node := nodeList.Items[0] | ||||
| 						_, pressure := nodeutil.GetNodeCondition(&node.Status, v1.NodeMemoryPressure) | ||||
| 						if pressure != nil && pressure.Status == v1.ConditionTrue { | ||||
| 							return fmt.Errorf("node is still reporting memory pressure condition: %s", pressure) | ||||
| 						} | ||||
| 						return nil | ||||
| 					}, 5*time.Minute, 15*time.Second).Should(BeNil()) | ||||
|  | ||||
| 					// Check available memory after condition disappears, just in case: | ||||
| 					// Wait for available memory to decrease to a reasonable level before ending the test. | ||||
| 					// This helps prevent interference with tests that start immediately after this one. | ||||
| 					By("waiting for available memory to decrease to a reasonable level before ending the test.") | ||||
| 					Eventually(func() error { | ||||
| 						summary, err := getNodeSummary() | ||||
| 						if err != nil { | ||||
| 							return err | ||||
| 						} | ||||
| 						if summary.Node.Memory.AvailableBytes == nil { | ||||
| 							return fmt.Errorf("summary.Node.Memory.AvailableBytes was nil, cannot get memory stats.") | ||||
| 						} | ||||
| 						if summary.Node.Memory.WorkingSetBytes == nil { | ||||
| 							return fmt.Errorf("summary.Node.Memory.WorkingSetBytes was nil, cannot get memory stats.") | ||||
| 						} | ||||
| 						avail := *summary.Node.Memory.AvailableBytes | ||||
| 						wset := *summary.Node.Memory.WorkingSetBytes | ||||
|  | ||||
| 						// memory limit = avail + wset | ||||
| 						limit := avail + wset | ||||
| 						halflimit := limit / 2 | ||||
|  | ||||
| 						// Wait for at least half of memory limit to be available | ||||
| 						if avail >= halflimit { | ||||
| 							return nil | ||||
| 						} | ||||
| 						return fmt.Errorf("current available memory is: %d bytes. Expected at least %d bytes available.", avail, halflimit) | ||||
| 					}, 5*time.Minute, 15*time.Second).Should(BeNil()) | ||||
|  | ||||
| 					// TODO(mtaufen): 5 minute wait to stop flaky test bleeding while we figure out what is actually going on. | ||||
| 					//                If related to pressure transition period in eviction manager, probably only need to wait | ||||
| 					//                just over 30s becasue that is the transition period set for node e2e tests. But since we | ||||
| 					//                know 5 min works and we don't know if transition period is the problem, wait 5 min for now. | ||||
| 					time.Sleep(5 * time.Minute) | ||||
|  | ||||
| 					// Finally, try starting a new pod and wait for it to be scheduled and running. | ||||
| 					// This is the final check to try to prevent interference with subsequent tests. | ||||
| 					podName := "admit-best-effort-pod" | ||||
| 					f.PodClient().CreateSync(&v1.Pod{ | ||||
| 						ObjectMeta: metav1.ObjectMeta{ | ||||
| 							Name: podName, | ||||
| 						}, | ||||
| 						Spec: v1.PodSpec{ | ||||
| 							RestartPolicy: v1.RestartPolicyNever, | ||||
| 							Containers: []v1.Container{ | ||||
| 								{ | ||||
| 									Image: framework.GetPauseImageNameForHostArch(), | ||||
| 									Name:  podName, | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 					}) | ||||
| 				}) | ||||
|  | ||||
| 				It("should evict pods in the correct order (besteffort first, then burstable, then guaranteed)", func() { | ||||
| 					By("creating a guaranteed pod, a burstable pod, and a besteffort pod.") | ||||
|  | ||||
| 					// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal. | ||||
| 					guaranteed := getMemhogPod("guaranteed-pod", "guaranteed", v1.ResourceRequirements{ | ||||
| 						Requests: v1.ResourceList{ | ||||
| 							v1.ResourceCPU:    resource.MustParse("100m"), | ||||
| 							v1.ResourceMemory: resource.MustParse("100Mi"), | ||||
| 						}, | ||||
| 						Limits: v1.ResourceList{ | ||||
| 							v1.ResourceCPU:    resource.MustParse("100m"), | ||||
| 							v1.ResourceMemory: resource.MustParse("100Mi"), | ||||
| 						}}) | ||||
| 					guaranteed = f.PodClient().CreateSync(guaranteed) | ||||
| 					glog.Infof("pod created with name: %s", guaranteed.Name) | ||||
|  | ||||
| 					// A pod is burstable if limits and requests do not match across all containers. | ||||
| 					burstable := getMemhogPod("burstable-pod", "burstable", v1.ResourceRequirements{ | ||||
| 						Requests: v1.ResourceList{ | ||||
| 							v1.ResourceCPU:    resource.MustParse("100m"), | ||||
| 							v1.ResourceMemory: resource.MustParse("100Mi"), | ||||
| 						}}) | ||||
| 					burstable = f.PodClient().CreateSync(burstable) | ||||
| 					glog.Infof("pod created with name: %s", burstable.Name) | ||||
|  | ||||
| 					// A pod is besteffort if none of its containers have specified any requests or limits	. | ||||
| 					besteffort := getMemhogPod("besteffort-pod", "besteffort", v1.ResourceRequirements{}) | ||||
| 					besteffort = f.PodClient().CreateSync(besteffort) | ||||
| 					glog.Infof("pod created with name: %s", besteffort.Name) | ||||
|  | ||||
| 					// We poll until timeout or all pods are killed. | ||||
| 					// Inside the func, we check that all pods are in a valid phase with | ||||
| 					// respect to the eviction order of best effort, then burstable, then guaranteed. | ||||
| 					By("polling the Status.Phase of each pod and checking for violations of the eviction order.") | ||||
| 					Eventually(func() error { | ||||
|  | ||||
| 						gteed, gtErr := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(guaranteed.Name, metav1.GetOptions{}) | ||||
| 						framework.ExpectNoError(gtErr, fmt.Sprintf("getting pod %s", guaranteed.Name)) | ||||
| 						gteedPh := gteed.Status.Phase | ||||
|  | ||||
| 						burst, buErr := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(burstable.Name, metav1.GetOptions{}) | ||||
| 						framework.ExpectNoError(buErr, fmt.Sprintf("getting pod %s", burstable.Name)) | ||||
| 						burstPh := burst.Status.Phase | ||||
|  | ||||
| 						best, beErr := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(besteffort.Name, metav1.GetOptions{}) | ||||
| 						framework.ExpectNoError(beErr, fmt.Sprintf("getting pod %s", besteffort.Name)) | ||||
| 						bestPh := best.Status.Phase | ||||
|  | ||||
| 						glog.Infof("pod phase: guaranteed: %v, burstable: %v, besteffort: %v", gteedPh, burstPh, bestPh) | ||||
|  | ||||
| 						// NOTE/TODO(mtaufen): This should help us debug why burstable appears to fail before besteffort in some | ||||
| 						//                     scenarios. We have seen some evidence that the eviction manager has in fact done the | ||||
| 						//                     right thing and evicted the besteffort first, and attempted to change the besteffort phase | ||||
| 						//                     to "Failed" when it evicts it, but that for some reason the test isn't seeing the updated | ||||
| 						//                     phase. I'm trying to confirm or deny this. | ||||
| 						//                     The eviction manager starts trying to evict things when the node comes under memory | ||||
| 						//                     pressure, and the eviction manager reports this information in the pressure condition. If we | ||||
| 						//                     see the eviction manager reporting a pressure condition for a while without the besteffort failing, | ||||
| 						//                     and we see that the manager did in fact evict the besteffort (this should be in the Kubelet log), we | ||||
| 						//                     will have more reason to believe the phase is out of date. | ||||
| 						nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) | ||||
| 						if err != nil { | ||||
| 							glog.Errorf("tried to get node list but got error: %v", err) | ||||
| 						} | ||||
| 						if len(nodeList.Items) != 1 { | ||||
| 							glog.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items) | ||||
| 						} | ||||
| 						node := nodeList.Items[0] | ||||
| 						_, pressure := nodeutil.GetNodeCondition(&node.Status, v1.NodeMemoryPressure) | ||||
| 						glog.Infof("node pressure condition: %s", pressure) | ||||
|  | ||||
| 						// NOTE/TODO(mtaufen): Also log (at least temporarily) the actual memory consumption on the node. | ||||
| 						//                     I used this to plot memory usage from a successful test run and it looks the | ||||
| 						//                     way I would expect. I want to see what the plot from a flake looks like. | ||||
| 						summary, err := getNodeSummary() | ||||
| 						if err != nil { | ||||
| 							return err | ||||
| 						} | ||||
| 						if summary.Node.Memory.WorkingSetBytes != nil { | ||||
| 							wset := *summary.Node.Memory.WorkingSetBytes | ||||
| 							glog.Infof("Node's working set is (bytes): %v", wset) | ||||
|  | ||||
| 						} | ||||
|  | ||||
| 						if bestPh == v1.PodRunning { | ||||
| 							Expect(burstPh).NotTo(Equal(v1.PodFailed), "burstable pod failed before best effort pod") | ||||
| 							Expect(gteedPh).NotTo(Equal(v1.PodFailed), "guaranteed pod failed before best effort pod") | ||||
| 						} else if burstPh == v1.PodRunning { | ||||
| 							Expect(gteedPh).NotTo(Equal(v1.PodFailed), "guaranteed pod failed before burstable pod") | ||||
| 						} | ||||
|  | ||||
| 						// When both besteffort and burstable have been evicted, the test has completed. | ||||
| 						if bestPh == v1.PodFailed && burstPh == v1.PodFailed { | ||||
| 							return nil | ||||
| 						} | ||||
| 						return fmt.Errorf("besteffort and burstable have not yet both been evicted.") | ||||
|  | ||||
| 					}, 60*time.Minute, 5*time.Second).Should(BeNil()) | ||||
|  | ||||
| 				}) | ||||
| 			}) | ||||
| 		}) | ||||
| 	}) | ||||
|  | ||||
| }) | ||||
|  | ||||
| func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod { | ||||
| 	env := []v1.EnvVar{ | ||||
| 		{ | ||||
| 			Name: "MEMORY_LIMIT", | ||||
| 			ValueFrom: &v1.EnvVarSource{ | ||||
| 				ResourceFieldRef: &v1.ResourceFieldSelector{ | ||||
| 					Resource: "limits.memory", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	// If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API | ||||
| 	// to pass limits.memory, which will be the total memory available. | ||||
| 	// This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit, | ||||
| 	// which will cause the test to fail inappropriately. | ||||
| 	var memLimit string | ||||
| 	if limit, ok := res.Limits[v1.ResourceMemory]; ok { | ||||
| 		memLimit = strconv.Itoa(int( | ||||
| 			float64(limit.Value()) * 0.8)) | ||||
| 	} else { | ||||
| 		memLimit = "$(MEMORY_LIMIT)" | ||||
| 	} | ||||
|  | ||||
| 	return &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			Name: podName, | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			RestartPolicy: v1.RestartPolicyNever, | ||||
| 			Containers: []v1.Container{ | ||||
| 				{ | ||||
| 					Name:            ctnName, | ||||
| 					Image:           "gcr.io/google-containers/stress:v1", | ||||
| 					ImagePullPolicy: "Always", | ||||
| 					Env:             env, | ||||
| 					// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick | ||||
| 					// to fill ~4Gi of memory, so initial ballpark 12Mi/tick. | ||||
| 					// We might see flakes due to timeout if the total memory on the nodes increases. | ||||
| 					Args:      []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit}, | ||||
| 					Resources: res, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue