when the hint fn returns error, the scheduling queue logs the error and treats it as QueueAfterBackoff.

Co-authored-by: Kensei Nakada <handbomusic@gmail.com>

Co-authored-by: Kante Yin <kerthcet@gmail.com>

Co-authored-by: XsWack <xushiwei5@huawei.com>
This commit is contained in:
carlory
2023-07-13 21:45:26 +08:00
parent 09200e9c92
commit 0105a002bc
13 changed files with 216 additions and 97 deletions

View File

@@ -433,7 +433,20 @@ func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework
continue
}
switch h := hintfn.QueueingHintFn(logger, pod, oldObj, newObj); h {
h, err := hintfn.QueueingHintFn(logger, pod, oldObj, newObj)
if err != nil {
// If the QueueingHintFn returned an error, we should treat the event as QueueAfterBackoff so that we can prevent
// the Pod from stucking in the unschedulable pod pool.
oldObjMeta, newObjMeta, asErr := util.As[klog.KMetadata](oldObj, newObj)
if asErr != nil {
logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod))
} else {
logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod), "oldObj", klog.KObj(oldObjMeta), "newObj", klog.KObj(newObjMeta))
}
h = framework.QueueAfterBackoff
}
switch h {
case framework.QueueSkip:
continue
case framework.QueueImmediately: