add(scheduler/framework): implement smaller Pod update events

This commit is contained in:
Kensei Nakada
2024-01-07 04:59:21 +00:00
parent 8f265b6305
commit 0cd1ee4259
11 changed files with 267 additions and 67 deletions

View File

@@ -270,6 +270,8 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) {
logger.Error(err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod)) logger.Error(err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod))
} }
events := queue.PodSchedulingPropertiesChange(newPod, oldPod)
for _, evt := range events {
// SchedulingQueue.AssignedPodUpdated has a problem: // SchedulingQueue.AssignedPodUpdated has a problem:
// It internally pre-filters Pods to move to activeQ, // It internally pre-filters Pods to move to activeQ,
// while taking only in-tree plugins into consideration. // while taking only in-tree plugins into consideration.
@@ -282,7 +284,8 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) {
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) { if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, queue.AssignedPodUpdate, oldPod, newPod, nil) sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, queue.AssignedPodUpdate, oldPod, newPod, nil)
} else { } else {
sched.SchedulingQueue.AssignedPodUpdated(logger, oldPod, newPod) sched.SchedulingQueue.AssignedPodUpdated(logger, oldPod, newPod, evt)
}
} }
} }

View File

@@ -62,7 +62,7 @@ func (pl *InterPodAffinity) EventsToRegister(_ context.Context) ([]framework.Clu
// All ActionType includes the following events: // All ActionType includes the following events:
// - Delete. An unschedulable Pod may fail due to violating an existing Pod's anti-affinity constraints, // - Delete. An unschedulable Pod may fail due to violating an existing Pod's anti-affinity constraints,
// deleting an existing Pod may make it schedulable. // deleting an existing Pod may make it schedulable.
// - Update. Updating on an existing Pod's labels (e.g., removal) may make // - UpdatePodLabel. Updating on an existing Pod's labels (e.g., removal) may make
// an unschedulable Pod schedulable. // an unschedulable Pod schedulable.
// - Add. An unschedulable Pod may fail due to violating pod-affinity constraints, // - Add. An unschedulable Pod may fail due to violating pod-affinity constraints,
// adding an assigned Pod may make it schedulable. // adding an assigned Pod may make it schedulable.
@@ -75,7 +75,7 @@ func (pl *InterPodAffinity) EventsToRegister(_ context.Context) ([]framework.Clu
// As a workaround, we add UpdateNodeTaint event to catch the case. // As a workaround, we add UpdateNodeTaint event to catch the case.
// We can remove UpdateNodeTaint when we remove the preCheck feature. // We can remove UpdateNodeTaint when we remove the preCheck feature.
// See: https://github.com/kubernetes/kubernetes/issues/110175 // See: https://github.com/kubernetes/kubernetes/issues/110175
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.All}, QueueingHintFn: pl.isSchedulableAfterPodChange}, {Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.UpdatePodLabel | framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodChange},
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange}, {Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
}, nil }, nil
} }

View File

@@ -250,9 +250,9 @@ func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error
func (f *Fit) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) { func (f *Fit) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
podActionType := framework.Delete podActionType := framework.Delete
if f.enableInPlacePodVerticalScaling { if f.enableInPlacePodVerticalScaling {
// If InPlacePodVerticalScaling (KEP 1287) is enabled, then PodUpdate event should be registered // If InPlacePodVerticalScaling (KEP 1287) is enabled, then PodRequestUpdate event should be registered
// for this plugin since a Pod update may free up resources that make other Pods schedulable. // for this plugin since a Pod update may free up resources that make other Pods schedulable.
podActionType |= framework.Update podActionType |= framework.UpdatePodRequest
} }
return []framework.ClusterEventWithHint{ return []framework.ClusterEventWithHint{
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: podActionType}, QueueingHintFn: f.isSchedulableAfterPodChange}, {Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: podActionType}, QueueingHintFn: f.isSchedulableAfterPodChange},

View File

@@ -1095,7 +1095,7 @@ func TestEventsToRegister(t *testing.T) {
"Register events with InPlacePodVerticalScaling feature enabled", "Register events with InPlacePodVerticalScaling feature enabled",
true, true,
[]framework.ClusterEventWithHint{ []framework.ClusterEventWithHint{
{Event: framework.ClusterEvent{Resource: "Pod", ActionType: framework.Update | framework.Delete}}, {Event: framework.ClusterEvent{Resource: "Pod", ActionType: framework.UpdatePodRequest | framework.Delete}},
{Event: framework.ClusterEvent{Resource: "Node", ActionType: framework.Add | framework.Update}}, {Event: framework.ClusterEvent{Resource: "Node", ActionType: framework.Add | framework.Update}},
}, },
}, },

View File

@@ -139,11 +139,11 @@ func (pl *PodTopologySpread) EventsToRegister(_ context.Context) ([]framework.Cl
// All ActionType includes the following events: // All ActionType includes the following events:
// - Add. An unschedulable Pod may fail due to violating topology spread constraints, // - Add. An unschedulable Pod may fail due to violating topology spread constraints,
// adding an assigned Pod may make it schedulable. // adding an assigned Pod may make it schedulable.
// - Update. Updating on an existing Pod's labels (e.g., removal) may make // - UpdatePodLabel. Updating on an existing Pod's labels (e.g., removal) may make
// an unschedulable Pod schedulable. // an unschedulable Pod schedulable.
// - Delete. An unschedulable Pod may fail due to violating an existing Pod's topology spread constraints, // - Delete. An unschedulable Pod may fail due to violating an existing Pod's topology spread constraints,
// deleting an existing Pod may make it schedulable. // deleting an existing Pod may make it schedulable.
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.All}, QueueingHintFn: pl.isSchedulableAfterPodChange}, {Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.UpdatePodLabel | framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodChange},
// Node add|delete|update maybe lead an topology key changed, // Node add|delete|update maybe lead an topology key changed,
// and make these pod in scheduling schedulable or unschedulable. // and make these pod in scheduling schedulable or unschedulable.
// //

View File

@@ -46,25 +46,39 @@ type ActionType int64
// Constants for ActionTypes. // Constants for ActionTypes.
const ( const (
Add ActionType = 1 << iota // 1 Add ActionType = 1 << iota
Delete // 10 Delete
// UpdateNodeXYZ is only applicable for Node events. // UpdateNodeXYZ is only applicable for Node events.
UpdateNodeAllocatable // 100 // If you use UpdateNodeXYZ,
UpdateNodeLabel // 1000 // your plugin's QueueingHint is only executed for the specific sub-Update event.
UpdateNodeTaint // 10000 // It's better to narrow down the scope of the event by specifying them
UpdateNodeCondition // 100000 // for better performance in requeueing.
UpdateNodeAnnotation // 1000000 UpdateNodeAllocatable
UpdateNodeLabel
UpdateNodeTaint
UpdateNodeCondition
UpdateNodeAnnotation
All ActionType = 1<<iota - 1 // 1111111 // UpdatePodXYZ is only applicable for Pod events.
UpdatePodLabel
// UpdatePodRequest is a update for pod's resource request calculated by resource.PodRequests() function.
UpdatePodRequest
All ActionType = 1<<iota - 1
// Use the general Update type if you don't either know or care the specific sub-Update type to use. // Use the general Update type if you don't either know or care the specific sub-Update type to use.
Update = UpdateNodeAllocatable | UpdateNodeLabel | UpdateNodeTaint | UpdateNodeCondition | UpdateNodeAnnotation Update = UpdateNodeAllocatable | UpdateNodeLabel | UpdateNodeTaint | UpdateNodeCondition | UpdateNodeAnnotation | UpdatePodLabel | UpdatePodRequest
) )
// GVK is short for group/version/kind, which can uniquely represent a particular API resource. // GVK is short for group/version/kind, which can uniquely represent a particular API resource.
type GVK string type GVK string
// Constants for GVKs. // Constants for GVKs.
//
// Note:
// - UpdatePodXYZ or UpdateNodeXYZ: triggered by updating particular parts of a Pod or a Node, e.g. updatePodLabel.
// Use specific events rather than general ones (updatePodLabel vs update) can make the requeueing process more efficient
// and consume less memories as less events will be cached at scheduler.
const ( const (
// There are a couple of notes about how the scheduler notifies the events of Pods: // There are a couple of notes about how the scheduler notifies the events of Pods:
// - Add: add events could be triggered by either a newly created Pod or an existing Pod that is scheduled to a Node. // - Add: add events could be triggered by either a newly created Pod or an existing Pod that is scheduled to a Node.
@@ -166,6 +180,7 @@ func (s QueueingHint) String() string {
type ClusterEvent struct { type ClusterEvent struct {
Resource GVK Resource GVK
ActionType ActionType ActionType ActionType
// Label describes this cluster event, only used in logging and metrics.
Label string Label string
} }
@@ -175,14 +190,25 @@ func (ce ClusterEvent) IsWildCard() bool {
} }
// Match returns true if ClusterEvent is matched with the coming event. // Match returns true if ClusterEvent is matched with the coming event.
// "match" means the coming event is the same or more specific than the ce.
// i.e., when ce.ActionType is Update, it return true if a coming event is UpdateNodeLabel
// because UpdateNodeLabel is more specific than Update.
// On the other hand, when ce.ActionType is UpdateNodeLabel, it doesn't return true if a coming event is Update.
// This is based on the fact that the scheduler interprets the coming cluster event as specific event if possible;
// meaning, if a coming event is Node/Update,
// it means that Node's update is not something that can be interpreted as any of Node's specific Update events.
//
// If the ce.Resource is "*", there's no requirement for the coming event' Resource. // If the ce.Resource is "*", there's no requirement for the coming event' Resource.
// Contrarily, if the coming event's Resource is "*", the ce.Resource should only be "*". // Contrarily, if the coming event's Resource is "*", the ce.Resource should only be "*".
// (which should never happen in the current implementation of the scheduling queue.)
// //
// Note: we have a special case here when the coming event is a wildcard event, // Note: we have a special case here when the coming event is a wildcard event,
// it will force all Pods to move to activeQ/backoffQ, // it will force all Pods to move to activeQ/backoffQ,
// but we take it as an unmatched event unless the ce is also a wildcard one. // but we take it as an unmatched event unless the ce is also a wildcard one.
func (ce ClusterEvent) Match(event ClusterEvent) bool { func (ce ClusterEvent) Match(comingEvent ClusterEvent) bool {
return ce.IsWildCard() || (ce.Resource == WildCard || ce.Resource == event.Resource) && ce.ActionType&event.ActionType != 0 return ce.IsWildCard() ||
(ce.Resource == WildCard || ce.Resource == comingEvent.Resource) &&
(ce.ActionType&comingEvent.ActionType != 0 && comingEvent.ActionType <= ce.ActionType)
} }
func UnrollWildCardResource() []ClusterEventWithHint { func UnrollWildCardResource() []ClusterEventWithHint {

View File

@@ -1626,9 +1626,15 @@ func TestCloudEvent_Match(t *testing.T) {
wantResult: true, wantResult: true,
}, },
{ {
name: "event with resource = 'Pod' matching with coming events carries same actionType", name: "no match if a coming event is less specific",
event: ClusterEvent{Resource: Pod, ActionType: UpdateNodeLabel | UpdateNodeTaint}, event: ClusterEvent{Resource: Node, ActionType: UpdateNodeLabel},
comingEvent: ClusterEvent{Resource: Pod, ActionType: UpdateNodeLabel}, comingEvent: ClusterEvent{Resource: Node, ActionType: Update},
wantResult: false,
},
{
name: "match if a coming event is more specific",
event: ClusterEvent{Resource: Node, ActionType: Update},
comingEvent: ClusterEvent{Resource: Node, ActionType: UpdateNodeLabel},
wantResult: true, wantResult: true,
}, },
{ {

View File

@@ -19,6 +19,9 @@ package queue
import ( import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/equality"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/api/v1/resource"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework"
) )
@@ -53,6 +56,10 @@ var (
UnscheduledPodDelete = framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Update, Label: "UnschedulablePodDelete"} UnscheduledPodDelete = framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Update, Label: "UnschedulablePodDelete"}
// AssignedPodDelete is the event when an assigned pod is deleted. // AssignedPodDelete is the event when an assigned pod is deleted.
AssignedPodDelete = framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Delete, Label: "AssignedPodDelete"} AssignedPodDelete = framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Delete, Label: "AssignedPodDelete"}
// PodRequestChange is the event when a pod's resource request is changed.
PodRequestChange = framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodRequest, Label: "PodRequestChange"}
// PodLabelChange is the event when a pod's label is changed.
PodLabelChange = framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodLabel, Label: "PodLabelChange"}
// NodeSpecUnschedulableChange is the event when unschedulable node spec is changed. // NodeSpecUnschedulableChange is the event when unschedulable node spec is changed.
NodeSpecUnschedulableChange = framework.ClusterEvent{Resource: framework.Node, ActionType: framework.UpdateNodeTaint, Label: "NodeSpecUnschedulableChange"} NodeSpecUnschedulableChange = framework.ClusterEvent{Resource: framework.Node, ActionType: framework.UpdateNodeTaint, Label: "NodeSpecUnschedulableChange"}
// NodeAllocatableChange is the event when node allocatable is changed. // NodeAllocatableChange is the event when node allocatable is changed.
@@ -95,6 +102,44 @@ var (
UnschedulableTimeout = framework.ClusterEvent{Resource: framework.WildCard, ActionType: framework.All, Label: "UnschedulableTimeout"} UnschedulableTimeout = framework.ClusterEvent{Resource: framework.WildCard, ActionType: framework.All, Label: "UnschedulableTimeout"}
) )
// PodSchedulingPropertiesChange interprets the update of a pod and returns corresponding UpdatePodXYZ event(s).
// Once we have other pod update events, we should update here as well.
func PodSchedulingPropertiesChange(newPod *v1.Pod, oldPod *v1.Pod) (events []framework.ClusterEvent) {
podChangeExtracters := []podChangeExtractor{
extractPodLabelsChange,
extractPodResourceRequestChange,
}
for _, fn := range podChangeExtracters {
if event := fn(newPod, oldPod); event != nil {
events = append(events, *event)
}
}
if len(events) == 0 {
events = append(events, AssignedPodUpdate)
}
return
}
type podChangeExtractor func(newNode *v1.Pod, oldNode *v1.Pod) *framework.ClusterEvent
func extractPodResourceRequestChange(newPod, oldPod *v1.Pod) *framework.ClusterEvent {
opt := resource.PodResourcesOptions{InPlacePodVerticalScalingEnabled: utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling)}
if !equality.Semantic.DeepEqual(resource.PodRequests(newPod, opt), resource.PodRequests(oldPod, opt)) {
return &PodRequestChange
}
return nil
}
func extractPodLabelsChange(newPod *v1.Pod, oldPod *v1.Pod) *framework.ClusterEvent {
if isLabelChanged(newPod.GetLabels(), oldPod.GetLabels()) {
return &PodLabelChange
}
return nil
}
// NodeSchedulingPropertiesChange interprets the update of a node and returns corresponding UpdateNodeXYZ event(s). // NodeSchedulingPropertiesChange interprets the update of a node and returns corresponding UpdateNodeXYZ event(s).
func NodeSchedulingPropertiesChange(newNode *v1.Node, oldNode *v1.Node) (events []framework.ClusterEvent) { func NodeSchedulingPropertiesChange(newNode *v1.Node, oldNode *v1.Node) (events []framework.ClusterEvent) {
nodeChangeExtracters := []nodeChangeExtractor{ nodeChangeExtracters := []nodeChangeExtractor{
@@ -124,12 +169,16 @@ func extractNodeAllocatableChange(newNode *v1.Node, oldNode *v1.Node) *framework
} }
func extractNodeLabelsChange(newNode *v1.Node, oldNode *v1.Node) *framework.ClusterEvent { func extractNodeLabelsChange(newNode *v1.Node, oldNode *v1.Node) *framework.ClusterEvent {
if !equality.Semantic.DeepEqual(oldNode.GetLabels(), newNode.GetLabels()) { if isLabelChanged(newNode.GetLabels(), oldNode.GetLabels()) {
return &NodeLabelChange return &NodeLabelChange
} }
return nil return nil
} }
func isLabelChanged(newLabels map[string]string, oldLabels map[string]string) bool {
return !equality.Semantic.DeepEqual(newLabels, oldLabels)
}
func extractNodeTaintsChange(newNode *v1.Node, oldNode *v1.Node) *framework.ClusterEvent { func extractNodeTaintsChange(newNode *v1.Node, oldNode *v1.Node) *framework.ClusterEvent {
if !equality.Semantic.DeepEqual(newNode.Spec.Taints, oldNode.Spec.Taints) { if !equality.Semantic.DeepEqual(newNode.Spec.Taints, oldNode.Spec.Taints) {
return &NodeTaintChange return &NodeTaintChange

View File

@@ -270,3 +270,108 @@ func TestNodeSchedulingPropertiesChange(t *testing.T) {
} }
} }
} }
func Test_podSchedulingPropertiesChange(t *testing.T) {
podWithBigRequest := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "app",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("101m")},
},
},
},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "app",
AllocatedResources: v1.ResourceList{v1.ResourceCPU: resource.MustParse("101m")},
},
},
},
}
podWithBigRequestAndLabel := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"foo": "bar"},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "app",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("101m")},
},
},
},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "app",
AllocatedResources: v1.ResourceList{v1.ResourceCPU: resource.MustParse("101m")},
},
},
},
}
podWithSmallRequest := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "app",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")},
},
},
},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "app",
AllocatedResources: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")},
},
},
},
}
tests := []struct {
name string
newPod *v1.Pod
oldPod *v1.Pod
want []framework.ClusterEvent
}{
{
name: "only label is updated",
newPod: st.MakePod().Label("foo", "bar").Obj(),
oldPod: st.MakePod().Label("foo", "bar2").Obj(),
want: []framework.ClusterEvent{PodLabelChange},
},
{
name: "only pod's resource request is updated",
oldPod: podWithSmallRequest,
newPod: podWithBigRequest,
want: []framework.ClusterEvent{PodRequestChange},
},
{
name: "both pod's resource request and label are updated",
oldPod: podWithSmallRequest,
newPod: podWithBigRequestAndLabel,
want: []framework.ClusterEvent{PodLabelChange, PodRequestChange},
},
{
name: "untracked properties of pod is updated",
newPod: st.MakePod().Annotation("foo", "bar").Obj(),
oldPod: st.MakePod().Annotation("foo", "bar2").Obj(),
want: []framework.ClusterEvent{AssignedPodUpdate},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := PodSchedulingPropertiesChange(tt.newPod, tt.oldPod)
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Errorf("unexpected event is returned from podSchedulingPropertiesChange (-want, +got):\n%s", diff)
}
})
}
}

View File

@@ -120,7 +120,7 @@ type SchedulingQueue interface {
// See https://github.com/kubernetes/kubernetes/issues/110175 // See https://github.com/kubernetes/kubernetes/issues/110175
MoveAllToActiveOrBackoffQueue(logger klog.Logger, event framework.ClusterEvent, oldObj, newObj interface{}, preCheck PreEnqueueCheck) MoveAllToActiveOrBackoffQueue(logger klog.Logger, event framework.ClusterEvent, oldObj, newObj interface{}, preCheck PreEnqueueCheck)
AssignedPodAdded(logger klog.Logger, pod *v1.Pod) AssignedPodAdded(logger klog.Logger, pod *v1.Pod)
AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod) AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod, event framework.ClusterEvent)
PendingPods() ([]*v1.Pod, string) PendingPods() ([]*v1.Pod, string)
PodsInActiveQ() []*v1.Pod PodsInActiveQ() []*v1.Pod
// Close closes the SchedulingQueue so that the goroutine which is // Close closes the SchedulingQueue so that the goroutine which is
@@ -438,6 +438,7 @@ const (
// isEventOfInterest returns true if the event is of interest by some plugins. // isEventOfInterest returns true if the event is of interest by some plugins.
func (p *PriorityQueue) isEventOfInterest(logger klog.Logger, event framework.ClusterEvent) bool { func (p *PriorityQueue) isEventOfInterest(logger klog.Logger, event framework.ClusterEvent) bool {
if event.IsWildCard() { if event.IsWildCard() {
// Wildcard event moves Pods that failed with any plugins.
return true return true
} }
@@ -1086,7 +1087,10 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) error
// whether the update may make the pods schedulable. // whether the update may make the pods schedulable.
// Plugins have to implement a QueueingHint for Pod/Update event // Plugins have to implement a QueueingHint for Pod/Update event
// if the rejection from them could be resolved by updating unscheduled Pods itself. // if the rejection from them could be resolved by updating unscheduled Pods itself.
hint := p.isPodWorthRequeuing(logger, pInfo, UnscheduledPodUpdate, oldPod, newPod)
events := PodSchedulingPropertiesChange(newPod, oldPod)
for _, evt := range events {
hint := p.isPodWorthRequeuing(logger, pInfo, evt, oldPod, newPod)
queue := p.requeuePodViaQueueingHint(logger, pInfo, hint, UnscheduledPodUpdate.Label) queue := p.requeuePodViaQueueingHint(logger, pInfo, hint, UnscheduledPodUpdate.Label)
if queue != unschedulablePods { if queue != unschedulablePods {
logger.V(5).Info("Pod moved to an internal scheduling queue because the Pod is updated", "pod", klog.KObj(newPod), "event", PodUpdate, "queue", queue) logger.V(5).Info("Pod moved to an internal scheduling queue because the Pod is updated", "pod", klog.KObj(newPod), "event", PodUpdate, "queue", queue)
@@ -1094,7 +1098,10 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) error
} }
if queue == activeQ { if queue == activeQ {
p.cond.Broadcast() p.cond.Broadcast()
break
} }
}
return nil return nil
} }
if isPodUpdated(oldPod, newPod) { if isPodUpdated(oldPod, newPod) {
@@ -1158,32 +1165,18 @@ func (p *PriorityQueue) AssignedPodAdded(logger klog.Logger, pod *v1.Pod) {
p.lock.Unlock() p.lock.Unlock()
} }
// isPodResourcesResizedDown returns true if a pod CPU and/or memory resize request has been
// admitted by kubelet, is 'InProgress', and results in a net sizing down of updated resources.
// It returns false if either CPU or memory resource is net resized up, or if no resize is in progress.
func isPodResourcesResizedDown(pod *v1.Pod) bool {
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// TODO(vinaykul,wangchen615,InPlacePodVerticalScaling): Fix this to determine when a
// pod is truly resized down (might need oldPod if we cannot determine from Status alone)
if pod.Status.Resize == v1.PodResizeStatusInProgress {
return true
}
}
return false
}
// AssignedPodUpdated is called when a bound pod is updated. Change of labels // AssignedPodUpdated is called when a bound pod is updated. Change of labels
// may make pending pods with matching affinity terms schedulable. // may make pending pods with matching affinity terms schedulable.
func (p *PriorityQueue) AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod) { func (p *PriorityQueue) AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod, event framework.ClusterEvent) {
p.lock.Lock() p.lock.Lock()
if isPodResourcesResizedDown(newPod) { if event.Resource == framework.Pod && event.ActionType&framework.UpdatePodRequest != 0 {
// In this case, we don't want to pre-filter Pods by getUnschedulablePodsWithCrossTopologyTerm // In this case, we don't want to pre-filter Pods by getUnschedulablePodsWithCrossTopologyTerm
// because Pod related events may make Pods that were rejected by NodeResourceFit schedulable. // because Pod related events may make Pods that were rejected by NodeResourceFit schedulable.
p.moveAllToActiveOrBackoffQueue(logger, AssignedPodUpdate, oldPod, newPod, nil) p.moveAllToActiveOrBackoffQueue(logger, AssignedPodUpdate, oldPod, newPod, nil)
} else { } else {
// Pre-filter Pods to move by getUnschedulablePodsWithCrossTopologyTerm // Pre-filter Pods to move by getUnschedulablePodsWithCrossTopologyTerm
// because Pod related events only make Pods rejected by cross topology term schedulable. // because Pod related events only make Pods rejected by cross topology term schedulable.
p.movePodsToActiveOrBackoffQueue(logger, p.getUnschedulablePodsWithCrossTopologyTerm(logger, newPod), AssignedPodUpdate, oldPod, newPod) p.movePodsToActiveOrBackoffQueue(logger, p.getUnschedulablePodsWithCrossTopologyTerm(logger, newPod), event, oldPod, newPod)
} }
p.lock.Unlock() p.lock.Unlock()
} }

View File

@@ -791,6 +791,7 @@ func Test_UnionedGVKs(t *testing.T) {
name string name string
plugins schedulerapi.PluginSet plugins schedulerapi.PluginSet
want map[framework.GVK]framework.ActionType want map[framework.GVK]framework.ActionType
enableInPlacePodVerticalScaling bool
}{ }{
{ {
name: "filter without EnqueueExtensions plugin", name: "filter without EnqueueExtensions plugin",
@@ -875,10 +876,10 @@ func Test_UnionedGVKs(t *testing.T) {
want: map[framework.GVK]framework.ActionType{}, want: map[framework.GVK]framework.ActionType{},
}, },
{ {
name: "plugins with default profile", name: "plugins with default profile (InPlacePodVerticalScaling: disabled)",
plugins: schedulerapi.PluginSet{Enabled: defaults.PluginsV1.MultiPoint.Enabled}, plugins: schedulerapi.PluginSet{Enabled: defaults.PluginsV1.MultiPoint.Enabled},
want: map[framework.GVK]framework.ActionType{ want: map[framework.GVK]framework.ActionType{
framework.Pod: framework.All, framework.Pod: framework.Add | framework.UpdatePodLabel | framework.Delete,
framework.Node: framework.All, framework.Node: framework.All,
framework.CSINode: framework.All - framework.Delete, framework.CSINode: framework.All - framework.Delete,
framework.CSIDriver: framework.All - framework.Delete, framework.CSIDriver: framework.All - framework.Delete,
@@ -888,9 +889,26 @@ func Test_UnionedGVKs(t *testing.T) {
framework.StorageClass: framework.All - framework.Delete, framework.StorageClass: framework.All - framework.Delete,
}, },
}, },
{
name: "plugins with default profile (InPlacePodVerticalScaling: enabled)",
plugins: schedulerapi.PluginSet{Enabled: defaults.PluginsV1.MultiPoint.Enabled},
want: map[framework.GVK]framework.ActionType{
framework.Pod: framework.Add | framework.UpdatePodLabel | framework.UpdatePodRequest | framework.Delete,
framework.Node: framework.All,
framework.CSINode: framework.All - framework.Delete,
framework.CSIDriver: framework.All - framework.Delete,
framework.CSIStorageCapacity: framework.All - framework.Delete,
framework.PersistentVolume: framework.All - framework.Delete,
framework.PersistentVolumeClaim: framework.All - framework.Delete,
framework.StorageClass: framework.All - framework.Delete,
},
enableInPlacePodVerticalScaling: true,
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, tt.enableInPlacePodVerticalScaling)
_, ctx := ktesting.NewTestContext(t) _, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx) ctx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()