dra: patch ReservedFor during PreBind

This moves adding a pod to ReservedFor out of the main scheduling cycle into
PreBind. There it is done concurrently in different goroutines. For claims
which were specifically allocated for a pod (the most common case), that
usually makes no difference because the claim is already reserved.

It starts to matter when that pod then cannot be scheduled for other reasons,
because then the claim gets unreserved to allow deallocating it. It also
matters for claims that are created separately and then get used multiple times
by different pods.

Because multiple pods might get added to the same claim rapidly independently
from each other, it makes sense to do all claim status updates via patching:
then it is no longer necessary to have an up-to-date copy of the claim because
the patch operation will succeed if (and only if) the patched claim is valid.

Server-side-apply cannot be used for this because a client always has to send
the full list of all entries that it wants to be set, i.e. it cannot add one
entry unless it knows the full list.
This commit is contained in:
Patrick Ohly
2023-11-14 15:47:47 +01:00
parent 81986587ef
commit 5d1509126f
3 changed files with 224 additions and 64 deletions

View File

@@ -192,9 +192,14 @@ type want struct {
prescore result
reserve result
unreserve result
prebind result
postbind result
postFilterResult *framework.PostFilterResult
postfilter result
// unreserveAfterBindFailure, if set, triggers a call to Unreserve
// after PreBind, as if the actual Bind had failed.
unreserveAfterBindFailure *result
}
// prepare contains changes for objects in the API server.
@@ -206,6 +211,7 @@ type prepare struct {
prescore change
reserve change
unreserve change
prebind change
postbind change
postfilter change
}
@@ -237,7 +243,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimName,
claims: []*resourcev1alpha2.ResourceClaim{allocatedClaim, otherClaim},
want: want{
reserve: result{
prebind: result{
changes: change{
claim: func(claim *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
if claim.Name == claimName {
@@ -254,7 +260,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimTemplateInStatus,
claims: []*resourcev1alpha2.ResourceClaim{allocatedClaim, otherClaim},
want: want{
reserve: result{
prebind: result{
changes: change{
claim: func(claim *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
if claim.Name == claimName {
@@ -417,7 +423,7 @@ func TestPlugin(t *testing.T) {
schedulings: []*resourcev1alpha2.PodSchedulingContext{schedulingInfo},
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
want: want{
reserve: result{
prebind: result{
changes: change{
claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
return st.FromResourceClaim(in).
@@ -492,7 +498,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimName,
claims: []*resourcev1alpha2.ResourceClaim{allocatedClaimWithGoodTopology},
want: want{
reserve: result{
prebind: result{
changes: change{
claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
return st.FromResourceClaim(in).
@@ -503,6 +509,30 @@ func TestPlugin(t *testing.T) {
},
},
},
"bind-failure": {
pod: podWithClaimName,
claims: []*resourcev1alpha2.ResourceClaim{allocatedClaimWithGoodTopology},
want: want{
prebind: result{
changes: change{
claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
return st.FromResourceClaim(in).
ReservedFor(resourcev1alpha2.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
Obj()
},
},
},
unreserveAfterBindFailure: &result{
changes: change{
claim: func(in *resourcev1alpha2.ResourceClaim) *resourcev1alpha2.ResourceClaim {
out := in.DeepCopy()
out.Status.ReservedFor = []resourcev1alpha2.ResourceClaimConsumerReference{}
return out
},
},
},
},
},
"reserved-okay": {
pod: podWithClaimName,
claims: []*resourcev1alpha2.ResourceClaim{inUseClaim},
@@ -607,11 +637,26 @@ func TestPlugin(t *testing.T) {
})
} else {
initialObjects = testCtx.listAll(t)
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.postbind)
testCtx.p.PostBind(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
t.Run("postbind", func(t *testing.T) {
testCtx.verify(t, tc.want.postbind, initialObjects, nil, status)
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.prebind)
status := testCtx.p.PreBind(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
t.Run("prebind", func(t *testing.T) {
testCtx.verify(t, tc.want.prebind, initialObjects, nil, status)
})
if tc.want.unreserveAfterBindFailure != nil {
initialObjects = testCtx.listAll(t)
testCtx.p.Unreserve(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
t.Run("unreserverAfterBindFailure", func(t *testing.T) {
testCtx.verify(t, *tc.want.unreserveAfterBindFailure, initialObjects, nil, status)
})
} else if status.IsSuccess() {
initialObjects = testCtx.listAll(t)
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.postbind)
testCtx.p.PostBind(testCtx.ctx, testCtx.state, tc.pod, selectedNode.Node().Name)
t.Run("postbind", func(t *testing.T) {
testCtx.verify(t, tc.want.postbind, initialObjects, nil, nil)
})
}
}
} else {
initialObjects = testCtx.listAll(t)