when the hint fn returns error, the scheduling queue logs the error and treats it as QueueAfterBackoff.
Co-authored-by: Kensei Nakada <handbomusic@gmail.com> Co-authored-by: Kante Yin <kerthcet@gmail.com> Co-authored-by: XsWack <xushiwei5@huawei.com>
This commit is contained in:
@@ -306,17 +306,16 @@ func (pl *dynamicResources) PreEnqueue(ctx context.Context, pod *v1.Pod) (status
|
||||
// an informer. It checks whether that change made a previously unschedulable
|
||||
// pod schedulable. It errs on the side of letting a pod scheduling attempt
|
||||
// happen.
|
||||
func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) framework.QueueingHint {
|
||||
func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
if newObj == nil {
|
||||
// Deletes don't make a pod schedulable.
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
_, modifiedClaim, err := schedutil.As[*resourcev1alpha2.ResourceClaim](nil, newObj)
|
||||
originalClaim, modifiedClaim, err := schedutil.As[*resourcev1alpha2.ResourceClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
logger.Error(err, "unexpected new object in isSchedulableAfterClaimChange")
|
||||
return framework.QueueAfterBackoff
|
||||
return framework.QueueAfterBackoff, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
|
||||
}
|
||||
|
||||
usesClaim := false
|
||||
@@ -329,30 +328,24 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
|
||||
// foreachPodResourceClaim only returns errors for "not
|
||||
// schedulable".
|
||||
logger.V(4).Info("pod is not schedulable", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "reason", err.Error())
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if !usesClaim {
|
||||
// This was not the claim the pod was waiting for.
|
||||
logger.V(6).Info("unrelated claim got modified", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if oldObj == nil {
|
||||
if originalClaim == nil {
|
||||
logger.V(4).Info("claim for pod got created", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.QueueImmediately
|
||||
return framework.QueueImmediately, nil
|
||||
}
|
||||
|
||||
// Modifications may or may not be relevant. If the entire
|
||||
// status is as before, then something else must have changed
|
||||
// and we don't care. What happens in practice is that the
|
||||
// resource driver adds the finalizer.
|
||||
originalClaim, ok := oldObj.(*resourcev1alpha2.ResourceClaim)
|
||||
if !ok {
|
||||
// Shouldn't happen.
|
||||
logger.Error(nil, "unexpected old object in isSchedulableAfterClaimAddOrUpdate", "obj", oldObj)
|
||||
return framework.QueueAfterBackoff
|
||||
}
|
||||
if apiequality.Semantic.DeepEqual(&originalClaim.Status, &modifiedClaim.Status) {
|
||||
if loggerV := logger.V(7); loggerV.Enabled() {
|
||||
// Log more information.
|
||||
@@ -360,11 +353,11 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
|
||||
} else {
|
||||
logger.V(6).Info("claim for pod got modified where the pod doesn't care", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
}
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(4).Info("status of claim for pod got updated", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.QueueImmediately
|
||||
return framework.QueueImmediately, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodSchedulingContextChange is invoked for all
|
||||
@@ -372,25 +365,24 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
|
||||
// change made a previously unschedulable pod schedulable (updated) or a new
|
||||
// attempt is needed to re-create the object (deleted). It errs on the side of
|
||||
// letting a pod scheduling attempt happen.
|
||||
func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) framework.QueueingHint {
|
||||
func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
// Deleted? That can happen because we ourselves delete the PodSchedulingContext while
|
||||
// working on the pod. This can be ignored.
|
||||
if oldObj != nil && newObj == nil {
|
||||
logger.V(4).Info("PodSchedulingContext got deleted")
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
oldPodScheduling, newPodScheduling, err := schedutil.As[*resourcev1alpha2.PodSchedulingContext](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
logger.Error(nil, "isSchedulableAfterPodSchedulingChange")
|
||||
return framework.QueueAfterBackoff
|
||||
return framework.QueueAfterBackoff, fmt.Errorf("unexpected object in isSchedulableAfterPodSchedulingContextChange: %w", err)
|
||||
}
|
||||
podScheduling := newPodScheduling // Never nil because deletes are handled above.
|
||||
|
||||
if podScheduling.Name != pod.Name || podScheduling.Namespace != pod.Namespace {
|
||||
logger.V(7).Info("PodSchedulingContext for unrelated pod got modified", "pod", klog.KObj(pod), "podScheduling", klog.KObj(podScheduling))
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// If the drivers have provided information about all
|
||||
@@ -410,7 +402,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
// foreachPodResourceClaim only returns errors for "not
|
||||
// schedulable".
|
||||
logger.V(4).Info("pod is not schedulable, keep waiting", "pod", klog.KObj(pod), "reason", err.Error())
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Some driver responses missing?
|
||||
@@ -424,14 +416,14 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
} else {
|
||||
logger.V(5).Info("PodSchedulingContext with missing resource claim information, keep waiting", "pod", klog.KObj(pod))
|
||||
}
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if oldPodScheduling == nil /* create */ ||
|
||||
len(oldPodScheduling.Status.ResourceClaims) < len(podScheduling.Status.ResourceClaims) /* new information and not incomplete (checked above) */ {
|
||||
// This definitely is new information for the scheduler. Try again immediately.
|
||||
logger.V(4).Info("PodSchedulingContext for pod has all required information, schedule immediately", "pod", klog.KObj(pod))
|
||||
return framework.QueueImmediately
|
||||
return framework.QueueImmediately, nil
|
||||
}
|
||||
|
||||
// The other situation where the scheduler needs to do
|
||||
@@ -456,7 +448,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
for _, claimStatus := range podScheduling.Status.ResourceClaims {
|
||||
if sliceContains(claimStatus.UnsuitableNodes, podScheduling.Spec.SelectedNode) {
|
||||
logger.V(5).Info("PodSchedulingContext has unsuitable selected node, schedule immediately", "pod", klog.KObj(pod), "selectedNode", podScheduling.Spec.SelectedNode, "podResourceName", claimStatus.Name)
|
||||
return framework.QueueImmediately
|
||||
return framework.QueueImmediately, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -466,7 +458,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
!apiequality.Semantic.DeepEqual(&oldPodScheduling.Spec, &podScheduling.Spec) &&
|
||||
apiequality.Semantic.DeepEqual(&oldPodScheduling.Status, &podScheduling.Status) {
|
||||
logger.V(5).Info("PodSchedulingContext has only the scheduler spec changes, ignore the update", "pod", klog.KObj(pod))
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Once we get here, all changes which are known to require special responses
|
||||
@@ -479,7 +471,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
} else {
|
||||
logger.V(5).Info("PodSchedulingContext for pod with unknown changes, maybe schedule", "pod", klog.KObj(pod))
|
||||
}
|
||||
return framework.QueueAfterBackoff
|
||||
return framework.QueueAfterBackoff, nil
|
||||
|
||||
}
|
||||
|
||||
|
@@ -889,6 +889,7 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
claims []*resourcev1alpha2.ResourceClaim
|
||||
oldObj, newObj interface{}
|
||||
expectedHint framework.QueueingHint
|
||||
expectedErr bool
|
||||
}{
|
||||
"skip-deletes": {
|
||||
pod: podWithClaimTemplate,
|
||||
@@ -897,9 +898,9 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
expectedHint: framework.QueueSkip,
|
||||
},
|
||||
"backoff-wrong-new-object": {
|
||||
pod: podWithClaimTemplate,
|
||||
newObj: "not-a-claim",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
pod: podWithClaimTemplate,
|
||||
newObj: "not-a-claim",
|
||||
expectedErr: true,
|
||||
},
|
||||
"skip-wrong-claim": {
|
||||
pod: podWithClaimTemplate,
|
||||
@@ -927,10 +928,10 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
expectedHint: framework.QueueImmediately,
|
||||
},
|
||||
"backoff-wrong-old-object": {
|
||||
pod: podWithClaimName,
|
||||
oldObj: "not-a-claim",
|
||||
newObj: pendingImmediateClaim,
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
pod: podWithClaimName,
|
||||
oldObj: "not-a-claim",
|
||||
newObj: pendingImmediateClaim,
|
||||
expectedErr: true,
|
||||
},
|
||||
"skip-adding-finalizer": {
|
||||
pod: podWithClaimName,
|
||||
@@ -969,7 +970,13 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
require.NoError(t, store.Update(claim))
|
||||
}
|
||||
}
|
||||
actualHint := testCtx.p.isSchedulableAfterClaimChange(logger, tc.pod, tc.oldObj, tc.newObj)
|
||||
actualHint, err := testCtx.p.isSchedulableAfterClaimChange(logger, tc.pod, tc.oldObj, tc.newObj)
|
||||
if tc.expectedErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectedHint, actualHint)
|
||||
})
|
||||
}
|
||||
@@ -982,6 +989,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
claims []*resourcev1alpha2.ResourceClaim
|
||||
oldObj, newObj interface{}
|
||||
expectedHint framework.QueueingHint
|
||||
expectedErr bool
|
||||
}{
|
||||
"skip-deleted": {
|
||||
pod: podWithClaimTemplate,
|
||||
@@ -996,18 +1004,18 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
expectedHint: framework.QueueSkip,
|
||||
},
|
||||
"backoff-wrong-old-object": {
|
||||
pod: podWithClaimTemplate,
|
||||
oldObj: "not-a-scheduling-context",
|
||||
newObj: scheduling,
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
pod: podWithClaimTemplate,
|
||||
oldObj: "not-a-scheduling-context",
|
||||
newObj: scheduling,
|
||||
expectedErr: true,
|
||||
},
|
||||
"backoff-missed-wrong-old-object": {
|
||||
pod: podWithClaimTemplate,
|
||||
oldObj: cache.DeletedFinalStateUnknown{
|
||||
Obj: "not-a-scheduling-context",
|
||||
},
|
||||
newObj: scheduling,
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
newObj: scheduling,
|
||||
expectedErr: true,
|
||||
},
|
||||
"skip-unrelated-object": {
|
||||
pod: podWithClaimTemplate,
|
||||
@@ -1020,10 +1028,10 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
expectedHint: framework.QueueSkip,
|
||||
},
|
||||
"backoff-wrong-new-object": {
|
||||
pod: podWithClaimTemplate,
|
||||
oldObj: scheduling,
|
||||
newObj: "not-a-scheduling-context",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
pod: podWithClaimTemplate,
|
||||
oldObj: scheduling,
|
||||
newObj: "not-a-scheduling-context",
|
||||
expectedErr: true,
|
||||
},
|
||||
"skip-missing-claim": {
|
||||
pod: podWithClaimTemplate,
|
||||
@@ -1091,7 +1099,13 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
t.Parallel()
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
testCtx := setup(t, nil, tc.claims, nil, tc.schedulings)
|
||||
actualHint := testCtx.p.isSchedulableAfterPodSchedulingContextChange(logger, tc.pod, tc.oldObj, tc.newObj)
|
||||
actualHint, err := testCtx.p.isSchedulableAfterPodSchedulingContextChange(logger, tc.pod, tc.oldObj, tc.newObj)
|
||||
if tc.expectedErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectedHint, actualHint)
|
||||
})
|
||||
}
|
||||
|
@@ -57,11 +57,11 @@ func (pl *NodeUnschedulable) EventsToRegister() []framework.ClusterEventWithHint
|
||||
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
||||
// an informer. It checks whether that change made a previously unschedulable
|
||||
// pod schedulable.
|
||||
func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) framework.QueueingHint {
|
||||
func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
logger.Error(err, "unexpected objects in isSchedulableAfterNodeChange", "oldObj", oldObj, "newObj", newObj)
|
||||
return framework.QueueAfterBackoff
|
||||
return framework.QueueAfterBackoff, err
|
||||
}
|
||||
|
||||
originalNodeSchedulable, modifiedNodeSchedulable := false, !modifiedNode.Spec.Unschedulable
|
||||
@@ -71,11 +71,11 @@ func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, po
|
||||
|
||||
if !originalNodeSchedulable && modifiedNodeSchedulable {
|
||||
logger.V(4).Info("node was created or updated, pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueAfterBackoff
|
||||
return framework.QueueAfterBackoff, nil
|
||||
}
|
||||
|
||||
logger.V(4).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
|
@@ -90,12 +90,14 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
pod *v1.Pod
|
||||
oldObj, newObj interface{}
|
||||
expectedHint framework.QueueingHint
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "backoff-wrong-new-object",
|
||||
pod: &v1.Pod{},
|
||||
newObj: "not-a-node",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "backoff-wrong-old-object",
|
||||
@@ -107,6 +109,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
},
|
||||
oldObj: "not-a-node",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "skip-queue-on-unschedulable-node-added",
|
||||
@@ -170,7 +173,11 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
pl := &NodeUnschedulable{}
|
||||
if got := pl.isSchedulableAfterNodeChange(logger, testCase.pod, testCase.oldObj, testCase.newObj); got != testCase.expectedHint {
|
||||
got, err := pl.isSchedulableAfterNodeChange(logger, testCase.pod, testCase.oldObj, testCase.newObj)
|
||||
if err != nil && !testCase.expectedErr {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if got != testCase.expectedHint {
|
||||
t.Errorf("isSchedulableAfterNodeChange() = %v, want %v", got, testCase.expectedHint)
|
||||
}
|
||||
})
|
||||
|
Reference in New Issue
Block a user