dra: patch ReservedFor during PreBind

This moves adding a pod to ReservedFor out of the main scheduling cycle into PreBind. There it is done concurrently in different goroutines. For claims which were specifically allocated for a pod (the most common case), that usually makes no difference because the claim is already reserved. It starts to matter when that pod then cannot be scheduled for other reasons, because then the claim gets unreserved to allow deallocating it. It also matters for claims that are created separately and then get used multiple times by different pods. Because multiple pods might get added to the same claim rapidly independently from each other, it makes sense to do all claim status updates via patching: then it is no longer necessary to have an up-to-date copy of the claim because the patch operation will succeed if (and only if) the patched claim is valid. Server-side-apply cannot be used for this because a client always has to send the full list of all entries that it wants to be set, i.e. it cannot add one entry unless it knows the full list.
2023-11-14 15:47:47 +01:00
parent 81986587ef
commit 5d1509126f
3 changed files with 224 additions and 64 deletions
--- a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go
+++ b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go
@@ -192,9 +192,14 @@ type want struct {
 	prescore         result
 	reserve          result
 	unreserve        result
+	prebind          result
 	postbind         result
 	postFilterResult *framework.PostFilterResult
 	postfilter       result
+
+	// unreserveAfterBindFailure, if set, triggers a call to Unreserve
+	// after PreBind, as if the actual Bind had failed.
+	unreserveAfterBindFailure *result
 }

 // prepare contains changes for objects in the API server.
@@ -206,6 +211,7 @@ type prepare struct {
 	prescore   change
 	reserve    change
 	unreserve  change
+	prebind    change
 	postbind   change
 	postfilter change
 }
@@ -237,7 +243,7 @@ func TestPlugin(t *testing.T) {
 			pod:    podWithClaimName,
 			claims: []*resourcev1alpha2.ResourceClaim{allocatedClaim, otherClaim},
 			want: want{
-				reserve: result{
+				prebind: result{
 					changes: change{
 						claim: func(claim *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
 							if claim.Name == claimName {
@@ -254,7 +260,7 @@ func TestPlugin(t *testing.T) {
 			pod:    podWithClaimTemplateInStatus,
 			claims: []*resourcev1alpha2.ResourceClaim{allocatedClaim, otherClaim},
 			want: want{
-				reserve: result{
+				prebind: result{
 					changes: change{
 						claim: func(claim *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
 							if claim.Name == claimName {
@@ -417,7 +423,7 @@ func TestPlugin(t *testing.T) {
 			schedulings: []*resourcev1alpha2.PodSchedulingContext{schedulingInfo},
 			classes:     []*resourcev1alpha2.ResourceClass{resourceClass},
 			want: want{
-				reserve: result{
+				prebind: result{
 					changes: change{
 						claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
 							return st.FromResourceClaim(in).
@@ -492,7 +498,7 @@ func TestPlugin(t *testing.T) {
 			pod:    podWithClaimName,
 			claims: []*resourcev1alpha2.ResourceClaim{allocatedClaimWithGoodTopology},
 			want: want{
-				reserve: result{
+				prebind: result{
 					changes: change{
 						claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
 							return st.FromResourceClaim(in).
@@ -503,6 +509,30 @@ func TestPlugin(t *testing.T) {
 				},
 			},
 		},
+		"bind-failure": {
+			pod:    podWithClaimName,
+			claims: []*resourcev1alpha2.ResourceClaim{allocatedClaimWithGoodTopology},
+			want: want{
+				prebind: result{
+					changes: change{
+						claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
+							return st.FromResourceClaim(in).
+								ReservedFor(resourcev1alpha2.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
+								Obj()
+						},
+					},
+				},
+				unreserveAfterBindFailure: &result{
+					changes: change{
+						claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
+							out := in.DeepCopy()
+							out.Status.ReservedFor = []resourcev1alpha2.ResourceClaimConsumerReference{}
+							return out
+						},
+					},
+				},
+			},
+		},
 		"reserved-okay": {
 			pod:    podWithClaimName,
 			claims: []*resourcev1alpha2.ResourceClaim{inUseClaim},
@@ -607,11 +637,26 @@ func TestPlugin(t *testing.T) {
 					})
 				} else {
 					initialObjects = testCtx.listAll(t)
-					initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.postbind)
-					testCtx.p.PostBind(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
-					t.Run("postbind", func(t *testing.T) {
-						testCtx.verify(t, tc.want.postbind, initialObjects, nil, status)
+					initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.prebind)
+					status := testCtx.p.PreBind(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
+					t.Run("prebind", func(t *testing.T) {
+						testCtx.verify(t, tc.want.prebind, initialObjects, nil, status)
 					})
+
+					if tc.want.unreserveAfterBindFailure != nil {
+						initialObjects = testCtx.listAll(t)
+						testCtx.p.Unreserve(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
+						t.Run("unreserverAfterBindFailure", func(t *testing.T) {
+							testCtx.verify(t, *tc.want.unreserveAfterBindFailure, initialObjects, nil, status)
+						})
+					} else if status.IsSuccess() {
+						initialObjects = testCtx.listAll(t)
+						initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.postbind)
+						testCtx.p.PostBind(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
+						t.Run("postbind", func(t *testing.T) {
+							testCtx.verify(t, tc.want.postbind, initialObjects, nil, nil)
+						})
+					}
 				}
 			} else {
 				initialObjects = testCtx.listAll(t)