Merge pull request #57476 from misterikkit/podAffinityNode
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Restrict pod affinity search when TopologyKey=kubernetes.io/hostname **What this PR does / why we need it**: When a PodAffinityTerm uses TopologyKey=kubernetes.io/hostname, we can avoid searching the entire cluster for a match by only listing pods on the given node. This is a set of 3 PRs targeting affinity predicate performance. (#57476, #57477, #57478) The key takeaway is approximately 2x speedup in the large affinity benchmark. The unexpected increase in BenchmarkScheduling/1000Nodes/1000Pods seems to be an outlier, and did not recur on subsequent runs. The benchmarks have a moderate amount of variance to them, and I did not run them enough times to measure mean and standard deviation. | test | b.N | master | #57476 | #57477 | #57478 | combined | | ---- | --- | ------ | ---------- | ---------- | ---------- | -------- | | BenchmarkScheduling/100Nodes/0Pods | 100 | 39629010 ns/op | 36898566 ns/op (-6.89%) | 38461530 ns/op (-2.95%) | 36214136 ns/op (-8.62%) | 43090781 ns/op (+8.74%) | | BenchmarkScheduling/100Nodes/1000Pods | 100 | 85489577 ns/op | 69538016 ns/op (-18.66%) | 70104254 ns/op (-18.00%) | 75015585 ns/op (-12.25%) | 80986960 ns/op (-5.27%) | | BenchmarkScheduling/1000Nodes/0Pods | 100 | 219356660 ns/op | 200149051 ns/op (-8.76%) | 192867469 ns/op (-12.08%) | 196896770 ns/op (-10.24%) | 212563662 ns/op (-3.10%) | | BenchmarkScheduling/1000Nodes/1000Pods | 100 | 380368238 ns/op | 381786369 ns/op (+0.37%) | 387224973 ns/op (+1.80%) | 417974358 ns/op (+9.89%) | 411140230 ns/op (+8.09%) | | BenchmarkSchedulingAntiAffinity/500Nodes/250Pods | 250 | 124399176 ns/op | 97568988 ns/op (-21.57%) | 112027363 ns/op (-9.95%) | 129134326 ns/op (+3.81%) | 98607941 ns/op (-20.73%) | | BenchmarkSchedulingAntiAffinity/500Nodes/5000Pods | 250 | 491677096 ns/op | 441562422 ns/op (-10.19%) | 278127757 ns/op (-43.43%) | 447355609 ns/op (-9.01%) | 226310721 ns/op (-53.97%) | Combined performance contains all three patches. Percentages are relative to master. Methodology: I ran the tests on each branch with this command. ``` make test-integration WHAT="./test/integration/scheduler_perf" KUBE_TEST_ARGS="-run=xxxx -bench=." ``` The benchmarks have a fair amount of variance to them, and I did not run them enough times to measure mean and standard deviation. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes # The three PRs in this set should collectively fix #54189. **Special notes for your reviewer**: **Release note**: ```release-note Improve scheduler performance of MatchInterPodAffinity predicate. ```
This commit is contained in:
		| @@ -1116,7 +1116,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm | ||||
| // First return value indicates whether a matching pod exists on a node that matches the topology key, | ||||
| // while the second return value indicates whether a matching pod exists anywhere. | ||||
| // TODO: Do we really need any pod matching, or all pods matching? I think the latter. | ||||
| func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods []*v1.Pod, node *v1.Node, term *v1.PodAffinityTerm) (bool, bool, error) { | ||||
| func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, pods []*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, bool, error) { | ||||
| 	if len(term.TopologyKey) == 0 { | ||||
| 		return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity") | ||||
| 	} | ||||
| @@ -1126,7 +1126,12 @@ func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods [ | ||||
| 	if err != nil { | ||||
| 		return false, false, err | ||||
| 	} | ||||
| 	for _, existingPod := range allPods { | ||||
| 	// Special case: When the topological domain is node, we can limit our | ||||
| 	// search to pods on that node without searching the entire cluster. | ||||
| 	if term.TopologyKey == kubeletapis.LabelHostname { | ||||
| 		pods = nodeInfo.Pods() | ||||
| 	} | ||||
| 	for _, existingPod := range pods { | ||||
| 		match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) | ||||
| 		if match { | ||||
| 			matchingPodExists = true | ||||
| @@ -1134,7 +1139,7 @@ func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods [ | ||||
| 			if err != nil { | ||||
| 				return false, matchingPodExists, err | ||||
| 			} | ||||
| 			if priorityutil.NodesHaveSameTopologyKey(node, existingPodNode, term.TopologyKey) { | ||||
| 			if priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), existingPodNode, term.TopologyKey) { | ||||
| 				return true, matchingPodExists, nil | ||||
| 			} | ||||
| 		} | ||||
| @@ -1334,7 +1339,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node | ||||
|  | ||||
| 	// Check all affinity terms. | ||||
| 	for _, term := range GetPodAffinityTerms(affinity.PodAffinity) { | ||||
| 		termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, node, &term) | ||||
| 		termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term) | ||||
| 		if err != nil { | ||||
| 			errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) | ||||
| 			glog.Error(errMessage) | ||||
| @@ -1367,7 +1372,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node | ||||
|  | ||||
| 	// Check all anti-affinity terms. | ||||
| 	for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) { | ||||
| 		termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, node, &term) | ||||
| 		termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term) | ||||
| 		if err != nil || termMatches { | ||||
| 			glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v", | ||||
| 				podName(pod), node.Name, term, err) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue