DRA: add DRAControlPlaneController feature gate for "classic DRA"

In the API, the effect of the feature gate is that alpha fields get dropped on
create. They get preserved during updates if already set. The
PodSchedulingContext registration is *not* restricted by the feature gate.
This enables deleting stale PodSchedulingContext objects after disabling
the feature gate.

The scheduler checks the new feature gate before setting up an informer for
PodSchedulingContext objects and when deciding whether it can schedule a
pod. If any claim depends on a control plane controller, the scheduler bails
out, leading to:

    Status:       Pending
    ...
      Warning  FailedScheduling             73s   default-scheduler  0/1 nodes are available: resourceclaim depends on disabled DRAControlPlaneController feature. no new claims to deallocate, preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.

The rest of the changes prepare for testing the new feature separately from
"structured parameters". The goal is to have base "dra" jobs which just enable
and test those, then "classic-dra" jobs which add DRAControlPlaneController.
This commit is contained in:
Patrick Ohly
2024-07-15 21:05:21 +02:00
parent 599fe605f9
commit 9f36c8d718
16 changed files with 651 additions and 92 deletions

View File

@@ -316,7 +316,11 @@ func TestPlugin(t *testing.T) {
prepare prepare
want want
disable bool
// Feature gates. False is chosen so that the uncommon case
// doesn't need to be set.
disableDRA bool
disableClassicDRA bool
}{
"empty": {
pod: st.MakePod().Name("foo").Namespace("default").Obj(),
@@ -912,7 +916,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{inUseClaim},
},
"disable": {
"DRA-disabled": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{inUseClaim},
want: want{
@@ -920,7 +924,7 @@ func TestPlugin(t *testing.T) {
status: framework.NewStatus(framework.Skip),
},
},
disable: true,
disableDRA: true,
},
}
@@ -933,8 +937,11 @@ func TestPlugin(t *testing.T) {
if nodes == nil {
nodes = []*v1.Node{workerNode}
}
testCtx := setup(t, nodes, tc.claims, tc.classes, tc.schedulings, tc.objs)
testCtx.p.enabled = !tc.disable
features := feature.Features{
EnableDynamicResourceAllocation: !tc.disableDRA,
EnableDRAControlPlaneController: !tc.disableClassicDRA,
}
testCtx := setup(t, nodes, tc.claims, tc.classes, tc.schedulings, tc.objs, features)
initialObjects := testCtx.listAll(t)
status := testCtx.p.PreEnqueue(testCtx.ctx, tc.pod)
@@ -1136,6 +1143,9 @@ func (tc *testContext) listAll(t *testing.T) (objects []metav1.Object) {
}
func (tc *testContext) listAssumedClaims() []metav1.Object {
if tc.p.claimAssumeCache == nil {
return nil
}
var assumedClaims []metav1.Object
for _, obj := range tc.p.claimAssumeCache.List(nil) {
claim := obj.(*resourceapi.ResourceClaim)
@@ -1219,7 +1229,7 @@ func update(t *testing.T, objects []metav1.Object, updates change) []metav1.Obje
return updated
}
func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object) (result *testContext) {
func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object, features feature.Features) (result *testContext) {
t.Helper()
tc := &testContext{}
@@ -1242,7 +1252,7 @@ func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim,
t.Fatal(err)
}
pl, err := New(tCtx, nil, fh, feature.Features{EnableDynamicResourceAllocation: true})
pl, err := New(tCtx, nil, fh, features)
if err != nil {
t.Fatal(err)
}
@@ -1436,7 +1446,10 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
logger, tCtx := ktesting.NewTestContext(t)
testCtx := setup(t, nil, tc.claims, nil, nil, nil)
features := feature.Features{
EnableDynamicResourceAllocation: true,
}
testCtx := setup(t, nil, tc.claims, nil, nil, nil, features)
oldObj := tc.oldObj
newObj := tc.newObj
if claim, ok := tc.newObj.(*resourceapi.ResourceClaim); ok {
@@ -1604,7 +1617,11 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
t.Run(name, func(t *testing.T) {
t.Parallel()
logger, _ := ktesting.NewTestContext(t)
testCtx := setup(t, nil, tc.claims, nil, tc.schedulings, nil)
features := feature.Features{
EnableDynamicResourceAllocation: true,
EnableDRAControlPlaneController: true,
}
testCtx := setup(t, nil, tc.claims, nil, tc.schedulings, nil, features)
actualHint, err := testCtx.p.isSchedulableAfterPodSchedulingContextChange(logger, tc.pod, tc.oldObj, tc.newObj)
if tc.expectedErr {
require.Error(t, err)