Merge pull request #128431 from NoicFank/automated-cherry-pick-of-#128307-upstream-release-1.31

Automated cherry pick of #128307: bugfix(scheduler): preemption picks wrong victim node with higher priority pod on it
This commit is contained in:
Kubernetes Prow Robot
2024-11-12 09:13:07 +00:00
committed by GitHub
3 changed files with 105 additions and 0 deletions

View File

@@ -191,6 +191,8 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
}
var victims []*v1.Pod
numViolatingVictim := 0
// Sort potentialVictims by pod priority from high to low, which ensures to
// reprieve higher priority pods first.
sort.Slice(potentialVictims, func(i, j int) bool { return util.MoreImportantPod(potentialVictims[i].Pod, potentialVictims[j].Pod) })
// Try to reprieve as many pods as possible. We first try to reprieve the PDB
// violating victims and then other non-violating ones. In both cases, we start
@@ -225,6 +227,11 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
return nil, 0, framework.AsStatus(err)
}
}
// Sort victims after reprieving pods to keep the pods in the victims sorted in order of priority from high to low.
if len(violatingVictims) != 0 && len(nonViolatingVictims) != 0 {
sort.Slice(victims, func(i, j int) bool { return util.MoreImportantPod(victims[i], victims[j]) })
}
return victims, numViolatingVictim, framework.NewStatus(framework.Success)
}

View File

@@ -21,6 +21,7 @@ import (
"encoding/json"
"errors"
"fmt"
"math"
"math/rand"
"sort"
"strings"
@@ -142,6 +143,12 @@ func (pl *TestPlugin) Filter(ctx context.Context, state *framework.CycleState, p
return nil
}
const (
LabelKeyIsViolatingPDB = "test.kubernetes.io/is-violating-pdb"
LabelValueViolatingPDB = "violating"
LabelValueNonViolatingPDB = "non-violating"
)
func TestPostFilter(t *testing.T) {
onePodRes := map[v1.ResourceName]string{v1.ResourcePods: "1"}
nodeRes := map[v1.ResourceName]string{v1.ResourceCPU: "200m", v1.ResourceMemory: "400"}
@@ -149,6 +156,7 @@ func TestPostFilter(t *testing.T) {
name string
pod *v1.Pod
pods []*v1.Pod
pdbs []*policy.PodDisruptionBudget
nodes []*v1.Node
filteredNodesStatuses framework.NodeToStatusMap
extender framework.Extender
@@ -218,6 +226,29 @@ func TestPostFilter(t *testing.T) {
wantResult: framework.NewPostFilterResultWithNominatedNode("node2"),
wantStatus: framework.NewStatus(framework.Success),
},
{
name: "pod can be made schedulable on minHighestPriority node",
pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(veryHighPriority).Obj(),
pods: []*v1.Pod{
st.MakePod().Name("p1").UID("p1").Label(LabelKeyIsViolatingPDB, LabelValueNonViolatingPDB).Namespace(v1.NamespaceDefault).Priority(highPriority).Node("node1").Obj(),
st.MakePod().Name("p2").UID("p2").Label(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).Namespace(v1.NamespaceDefault).Priority(lowPriority).Node("node1").Obj(),
st.MakePod().Name("p3").UID("p3").Label(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).Namespace(v1.NamespaceDefault).Priority(midPriority).Node("node2").Obj(),
},
pdbs: []*policy.PodDisruptionBudget{
st.MakePDB().Name("violating-pdb").Namespace(v1.NamespaceDefault).MatchLabel(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).MinAvailable("100%").Obj(),
st.MakePDB().Name("non-violating-pdb").Namespace(v1.NamespaceDefault).MatchLabel(LabelKeyIsViolatingPDB, LabelValueNonViolatingPDB).MinAvailable("0").DisruptionsAllowed(math.MaxInt32).Obj(),
},
nodes: []*v1.Node{
st.MakeNode().Name("node1").Capacity(onePodRes).Obj(),
st.MakeNode().Name("node2").Capacity(onePodRes).Obj(),
},
filteredNodesStatuses: framework.NodeToStatusMap{
"node1": framework.NewStatus(framework.Unschedulable),
"node2": framework.NewStatus(framework.Unschedulable),
},
wantResult: framework.NewPostFilterResultWithNominatedNode("node2"),
wantStatus: framework.NewStatus(framework.Success),
},
{
name: "preemption result filtered out by extenders",
pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Obj(),
@@ -347,6 +378,13 @@ func TestPostFilter(t *testing.T) {
for i := range tt.pods {
podInformer.GetStore().Add(tt.pods[i])
}
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
for i := range tt.pdbs {
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
t.Fatal(err)
}
}
// Register NodeResourceFit as the Filter & PreFilter plugin.
registeredPlugins := []tf.RegisterPluginFunc{
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),