Merge pull request #128431 from NoicFank/automated-cherry-pick-of-#128307-upstream-release-1.31
Automated cherry pick of #128307: bugfix(scheduler): preemption picks wrong victim node with higher priority pod on it
This commit is contained in:
@@ -191,6 +191,8 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
|
||||
}
|
||||
var victims []*v1.Pod
|
||||
numViolatingVictim := 0
|
||||
// Sort potentialVictims by pod priority from high to low, which ensures to
|
||||
// reprieve higher priority pods first.
|
||||
sort.Slice(potentialVictims, func(i, j int) bool { return util.MoreImportantPod(potentialVictims[i].Pod, potentialVictims[j].Pod) })
|
||||
// Try to reprieve as many pods as possible. We first try to reprieve the PDB
|
||||
// violating victims and then other non-violating ones. In both cases, we start
|
||||
@@ -225,6 +227,11 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
|
||||
return nil, 0, framework.AsStatus(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort victims after reprieving pods to keep the pods in the victims sorted in order of priority from high to low.
|
||||
if len(violatingVictims) != 0 && len(nonViolatingVictims) != 0 {
|
||||
sort.Slice(victims, func(i, j int) bool { return util.MoreImportantPod(victims[i], victims[j]) })
|
||||
}
|
||||
return victims, numViolatingVictim, framework.NewStatus(framework.Success)
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -142,6 +143,12 @@ func (pl *TestPlugin) Filter(ctx context.Context, state *framework.CycleState, p
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
LabelKeyIsViolatingPDB = "test.kubernetes.io/is-violating-pdb"
|
||||
LabelValueViolatingPDB = "violating"
|
||||
LabelValueNonViolatingPDB = "non-violating"
|
||||
)
|
||||
|
||||
func TestPostFilter(t *testing.T) {
|
||||
onePodRes := map[v1.ResourceName]string{v1.ResourcePods: "1"}
|
||||
nodeRes := map[v1.ResourceName]string{v1.ResourceCPU: "200m", v1.ResourceMemory: "400"}
|
||||
@@ -149,6 +156,7 @@ func TestPostFilter(t *testing.T) {
|
||||
name string
|
||||
pod *v1.Pod
|
||||
pods []*v1.Pod
|
||||
pdbs []*policy.PodDisruptionBudget
|
||||
nodes []*v1.Node
|
||||
filteredNodesStatuses framework.NodeToStatusMap
|
||||
extender framework.Extender
|
||||
@@ -218,6 +226,29 @@ func TestPostFilter(t *testing.T) {
|
||||
wantResult: framework.NewPostFilterResultWithNominatedNode("node2"),
|
||||
wantStatus: framework.NewStatus(framework.Success),
|
||||
},
|
||||
{
|
||||
name: "pod can be made schedulable on minHighestPriority node",
|
||||
pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(veryHighPriority).Obj(),
|
||||
pods: []*v1.Pod{
|
||||
st.MakePod().Name("p1").UID("p1").Label(LabelKeyIsViolatingPDB, LabelValueNonViolatingPDB).Namespace(v1.NamespaceDefault).Priority(highPriority).Node("node1").Obj(),
|
||||
st.MakePod().Name("p2").UID("p2").Label(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).Namespace(v1.NamespaceDefault).Priority(lowPriority).Node("node1").Obj(),
|
||||
st.MakePod().Name("p3").UID("p3").Label(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).Namespace(v1.NamespaceDefault).Priority(midPriority).Node("node2").Obj(),
|
||||
},
|
||||
pdbs: []*policy.PodDisruptionBudget{
|
||||
st.MakePDB().Name("violating-pdb").Namespace(v1.NamespaceDefault).MatchLabel(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).MinAvailable("100%").Obj(),
|
||||
st.MakePDB().Name("non-violating-pdb").Namespace(v1.NamespaceDefault).MatchLabel(LabelKeyIsViolatingPDB, LabelValueNonViolatingPDB).MinAvailable("0").DisruptionsAllowed(math.MaxInt32).Obj(),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
st.MakeNode().Name("node1").Capacity(onePodRes).Obj(),
|
||||
st.MakeNode().Name("node2").Capacity(onePodRes).Obj(),
|
||||
},
|
||||
filteredNodesStatuses: framework.NodeToStatusMap{
|
||||
"node1": framework.NewStatus(framework.Unschedulable),
|
||||
"node2": framework.NewStatus(framework.Unschedulable),
|
||||
},
|
||||
wantResult: framework.NewPostFilterResultWithNominatedNode("node2"),
|
||||
wantStatus: framework.NewStatus(framework.Success),
|
||||
},
|
||||
{
|
||||
name: "preemption result filtered out by extenders",
|
||||
pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Obj(),
|
||||
@@ -347,6 +378,13 @@ func TestPostFilter(t *testing.T) {
|
||||
for i := range tt.pods {
|
||||
podInformer.GetStore().Add(tt.pods[i])
|
||||
}
|
||||
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
|
||||
for i := range tt.pdbs {
|
||||
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Register NodeResourceFit as the Filter & PreFilter plugin.
|
||||
registeredPlugins := []tf.RegisterPluginFunc{
|
||||
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
|
||||
|
||||
Reference in New Issue
Block a user