Fix a scheduler flaky e2e test
This commit is contained in:
		@@ -39,7 +39,6 @@ import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const maxNumberOfPods int64 = 10
 | 
			
		||||
const minPodCPURequest int64 = 500
 | 
			
		||||
 | 
			
		||||
var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
 | 
			
		||||
 | 
			
		||||
@@ -227,14 +226,36 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 | 
			
		||||
		verifyResult(cs, podsNeededForSaturation, 1, ns)
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity.
 | 
			
		||||
	// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
 | 
			
		||||
	// This test verifies we don't allow scheduling of pods in a way that sum of
 | 
			
		||||
	// limits of pods is greater than machines capacity.
 | 
			
		||||
	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
 | 
			
		||||
	// with any other test that touches Nodes or Pods.
 | 
			
		||||
	// It is so because we need to have precise control on what's running in the cluster.
 | 
			
		||||
	// Test scenario:
 | 
			
		||||
	// 1. Find the amount CPU resources on each node.
 | 
			
		||||
	// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
 | 
			
		||||
	// 3. Wait for the pods to be scheduled.
 | 
			
		||||
	// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
 | 
			
		||||
	// 5. Make sure this additional pod is not scheduled.
 | 
			
		||||
	It("validates resource limits of pods that are allowed to run [Conformance]", func() {
 | 
			
		||||
		framework.WaitForStableCluster(cs, masterNodes)
 | 
			
		||||
		nodeMaxAllocatable := int64(0)
 | 
			
		||||
 | 
			
		||||
		nodeToAllocatableMap := make(map[string]int64)
 | 
			
		||||
		for _, node := range nodeList.Items {
 | 
			
		||||
			nodeReady := false
 | 
			
		||||
			for _, condition := range node.Status.Conditions {
 | 
			
		||||
				if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
 | 
			
		||||
					nodeReady = true
 | 
			
		||||
					break
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if !nodeReady {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			// Apply node label to each node
 | 
			
		||||
			framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
 | 
			
		||||
			framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
 | 
			
		||||
			// Find allocatable amount of CPU.
 | 
			
		||||
			allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
 | 
			
		||||
			Expect(found).To(Equal(true))
 | 
			
		||||
			nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
 | 
			
		||||
@@ -242,7 +263,12 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 | 
			
		||||
				nodeMaxAllocatable = allocatable.MilliValue()
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		framework.WaitForStableCluster(cs, masterNodes)
 | 
			
		||||
		// Clean up added labels after this test.
 | 
			
		||||
		defer func() {
 | 
			
		||||
			for nodeName := range nodeToAllocatableMap {
 | 
			
		||||
				framework.RemoveLabelOffNode(cs, nodeName, "node")
 | 
			
		||||
			}
 | 
			
		||||
		}()
 | 
			
		||||
 | 
			
		||||
		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
 | 
			
		||||
		framework.ExpectNoError(err)
 | 
			
		||||
@@ -254,51 +280,60 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		var podsNeededForSaturation int
 | 
			
		||||
 | 
			
		||||
		milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods
 | 
			
		||||
		if milliCpuPerPod < minPodCPURequest {
 | 
			
		||||
			milliCpuPerPod = minPodCPURequest
 | 
			
		||||
		}
 | 
			
		||||
		framework.Logf("Using pod capacity: %vm", milliCpuPerPod)
 | 
			
		||||
		for name, leftAllocatable := range nodeToAllocatableMap {
 | 
			
		||||
			framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable)
 | 
			
		||||
			podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
 | 
			
		||||
 | 
			
		||||
		// As the pods are distributed randomly among nodes,
 | 
			
		||||
		// it can easily happen that all nodes are saturated
 | 
			
		||||
		// and there is no need to create additional pods.
 | 
			
		||||
		// StartPods requires at least one pod to replicate.
 | 
			
		||||
		if podsNeededForSaturation > 0 {
 | 
			
		||||
			framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
 | 
			
		||||
				*initPausePod(f, pausePodConfig{
 | 
			
		||||
					Name:   "",
 | 
			
		||||
					Labels: map[string]string{"name": ""},
 | 
			
		||||
					Resources: &v1.ResourceRequirements{
 | 
			
		||||
						Limits: v1.ResourceList{
 | 
			
		||||
							v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
 | 
			
		||||
						},
 | 
			
		||||
						Requests: v1.ResourceList{
 | 
			
		||||
							v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
 | 
			
		||||
		By("Starting Pods to consume most of the cluster CPU.")
 | 
			
		||||
		// Create one pod per node that requires 70% of the node remaining CPU.
 | 
			
		||||
		fillerPods := []*v1.Pod{}
 | 
			
		||||
		for nodeName, cpu := range nodeToAllocatableMap {
 | 
			
		||||
			requestedCPU := cpu * 7 / 10
 | 
			
		||||
			fillerPods = append(fillerPods, createPausePod(f, pausePodConfig{
 | 
			
		||||
				Name: "filler-pod-" + nodeName,
 | 
			
		||||
				Resources: &v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
 | 
			
		||||
					},
 | 
			
		||||
					Requests: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
				Affinity: &v1.Affinity{
 | 
			
		||||
					NodeAffinity: &v1.NodeAffinity{
 | 
			
		||||
						RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
 | 
			
		||||
							NodeSelectorTerms: []v1.NodeSelectorTerm{
 | 
			
		||||
								{
 | 
			
		||||
									MatchExpressions: []v1.NodeSelectorRequirement{
 | 
			
		||||
										{
 | 
			
		||||
											Key:      "node",
 | 
			
		||||
											Operator: v1.NodeSelectorOpIn,
 | 
			
		||||
											Values:   []string{nodeName},
 | 
			
		||||
										},
 | 
			
		||||
									},
 | 
			
		||||
								},
 | 
			
		||||
							},
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
				}), true, framework.Logf))
 | 
			
		||||
				},
 | 
			
		||||
			}))
 | 
			
		||||
		}
 | 
			
		||||
		// Wait for filler pods to schedule.
 | 
			
		||||
		for _, pod := range fillerPods {
 | 
			
		||||
			framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
 | 
			
		||||
		}
 | 
			
		||||
		By("Creating another pod that requires unavailable amount of CPU.")
 | 
			
		||||
		// Create another pod that requires 50% of the largest node CPU resources.
 | 
			
		||||
		// This pod should remain pending as at least 70% of CPU of other nodes in
 | 
			
		||||
		// the cluster are already consumed.
 | 
			
		||||
		podName := "additional-pod"
 | 
			
		||||
		conf := pausePodConfig{
 | 
			
		||||
			Name:   podName,
 | 
			
		||||
			Labels: map[string]string{"name": "additional"},
 | 
			
		||||
			Resources: &v1.ResourceRequirements{
 | 
			
		||||
				Limits: v1.ResourceList{
 | 
			
		||||
					v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
 | 
			
		||||
					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
		WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
 | 
			
		||||
		verifyResult(cs, podsNeededForSaturation, 1, ns)
 | 
			
		||||
		verifyResult(cs, len(fillerPods), 1, ns)
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user