Merge pull request #124595 from pohly/dra-scheduler-assume-cache-eventhandlers

DRA: scheduler event handlers via assume cache
This commit is contained in:
Kubernetes Prow Robot
2024-06-25 11:56:28 -07:00
committed by GitHub
12 changed files with 790 additions and 87 deletions

View File

@@ -276,7 +276,6 @@ type dynamicResources struct {
enabled bool
fh framework.Handle
clientset kubernetes.Interface
claimLister resourcev1alpha2listers.ResourceClaimLister
classLister resourcev1alpha2listers.ResourceClassLister
podSchedulingContextLister resourcev1alpha2listers.PodSchedulingContextLister
claimParametersLister resourcev1alpha2listers.ResourceClaimParametersLister
@@ -354,12 +353,10 @@ func New(ctx context.Context, plArgs runtime.Object, fh framework.Handle, fts fe
return &dynamicResources{}, nil
}
logger := klog.FromContext(ctx)
pl := &dynamicResources{
enabled: true,
fh: fh,
clientset: fh.ClientSet(),
claimLister: fh.SharedInformerFactory().Resource().V1alpha2().ResourceClaims().Lister(),
classLister: fh.SharedInformerFactory().Resource().V1alpha2().ResourceClasses().Lister(),
podSchedulingContextLister: fh.SharedInformerFactory().Resource().V1alpha2().PodSchedulingContexts().Lister(),
claimParametersLister: fh.SharedInformerFactory().Resource().V1alpha2().ResourceClaimParameters().Lister(),
@@ -368,7 +365,7 @@ func New(ctx context.Context, plArgs runtime.Object, fh framework.Handle, fts fe
classParametersIndexer: fh.SharedInformerFactory().Resource().V1alpha2().ResourceClassParameters().Informer().GetIndexer(),
resourceSliceLister: fh.SharedInformerFactory().Resource().V1alpha2().ResourceSlices().Lister(),
claimNameLookup: resourceclaim.NewNameLookup(fh.ClientSet()),
claimAssumeCache: assumecache.NewAssumeCache(logger, fh.SharedInformerFactory().Resource().V1alpha2().ResourceClaims().Informer(), "claim", "", nil),
claimAssumeCache: fh.ResourceClaimCache(),
}
if err := pl.claimParametersIndexer.AddIndexers(cache.Indexers{generatedFromIndex: claimParametersGeneratedFromIndexFunc}); err != nil {
@@ -651,21 +648,6 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
//
// TODO (https://github.com/kubernetes/kubernetes/issues/123697):
// check that the pending claims depend on structured parameters (depends on refactoring foreachPodResourceClaim, see other TODO).
//
// There is a small race here:
// - The dynamicresources plugin allocates claim A and updates the assume cache.
// - A second pod gets marked as unschedulable based on that assume cache.
// - Before the informer cache here catches up, the pod runs, terminates and
// the claim gets deallocated without ever sending the claim status with
// allocation to the scheduler.
// - The comparison below is for a *very* old claim with no allocation and the
// new claim where the allocation is already removed again, so no
// RemovedClaimAllocation event gets emitted.
//
// This is extremely unlikely and thus a fix is not needed for alpha in Kubernetes 1.30.
// TODO (https://github.com/kubernetes/kubernetes/issues/123698): The solution is to somehow integrate the assume cache
// into the event mechanism. This can be tackled together with adding autoscaler
// support, which also needs to do something with the assume cache.
logger.V(6).Info("claim with structured parameters got deallocated", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
return framework.Queue, nil
}
@@ -852,11 +834,16 @@ func (pl *dynamicResources) foreachPodResourceClaim(pod *v1.Pod, cb func(podReso
if claimName == nil {
continue
}
claim, err := pl.claimLister.ResourceClaims(pod.Namespace).Get(*claimName)
obj, err := pl.claimAssumeCache.Get(pod.Namespace + "/" + *claimName)
if err != nil {
return err
}
claim, ok := obj.(*resourcev1alpha2.ResourceClaim)
if !ok {
return fmt.Errorf("unexpected object type %T for assumed object %s/%s", obj, pod.Namespace, *claimName)
}
if claim.DeletionTimestamp != nil {
return fmt.Errorf("resourceclaim %q is being deleted", claim.Name)
}

View File

@@ -44,6 +44,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
st "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
)
@@ -1217,12 +1218,13 @@ func TestPlugin(t *testing.T) {
}
type testContext struct {
ctx context.Context
client *fake.Clientset
informerFactory informers.SharedInformerFactory
p *dynamicResources
nodeInfos []*framework.NodeInfo
state *framework.CycleState
ctx context.Context
client *fake.Clientset
informerFactory informers.SharedInformerFactory
claimAssumeCache *assumecache.AssumeCache
p *dynamicResources
nodeInfos []*framework.NodeInfo
state *framework.CycleState
}
func (tc *testContext) verify(t *testing.T, expected result, initialObjects []metav1.Object, result interface{}, status *framework.Status) {
@@ -1388,10 +1390,11 @@ func setup(t *testing.T, nodes []*v1.Node, claims []*resourcev1alpha2.ResourceCl
tc.client.PrependReactor("list", "resourceclassparameters", createListReactor(tc.client.Tracker(), "ResourceClassParameters"))
tc.informerFactory = informers.NewSharedInformerFactory(tc.client, 0)
tc.claimAssumeCache = assumecache.NewAssumeCache(tCtx.Logger(), tc.informerFactory.Resource().V1alpha2().ResourceClaims().Informer(), "resource claim", "", nil)
opts := []runtime.Option{
runtime.WithClientSet(tc.client),
runtime.WithInformerFactory(tc.informerFactory),
runtime.WithResourceClaimCache(tc.claimAssumeCache),
}
fh, err := runtime.NewFramework(tCtx, nil, nil, opts...)
if err != nil {
@@ -1558,6 +1561,7 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
},
"backoff-wrong-old-object": {
pod: podWithClaimName,
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
oldObj: "not-a-claim",
newObj: pendingImmediateClaim,
expectedErr: true,
@@ -1586,15 +1590,10 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
},
"structured-claim-deallocate": {
pod: podWithClaimName,
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
oldObj: func() *resourcev1alpha2.ResourceClaim {
claim := structuredAllocatedClaim.DeepCopy()
claim.Name += "-other"
return claim
}(),
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim, otherStructuredAllocatedClaim},
oldObj: otherStructuredAllocatedClaim,
newObj: func() *resourcev1alpha2.ResourceClaim {
claim := structuredAllocatedClaim.DeepCopy()
claim.Name += "-other"
claim := otherStructuredAllocatedClaim.DeepCopy()
claim.Status.Allocation = nil
return claim
}(),
@@ -1606,18 +1605,48 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
logger, _ := ktesting.NewTestContext(t)
logger, tCtx := ktesting.NewTestContext(t)
testCtx := setup(t, nil, tc.claims, nil, nil, nil)
oldObj := tc.oldObj
newObj := tc.newObj
if claim, ok := tc.newObj.(*resourcev1alpha2.ResourceClaim); ok {
// Update the informer because the lister gets called and must have the claim.
store := testCtx.informerFactory.Resource().V1alpha2().ResourceClaims().Informer().GetStore()
// Add or update through the client and wait until the event is processed.
claimKey := claim.Namespace + "/" + claim.Name
if tc.oldObj == nil {
require.NoError(t, store.Add(claim))
// Some test claims already have it. Clear for create.
createClaim := claim.DeepCopy()
createClaim.UID = ""
storedClaim, err := testCtx.client.ResourceV1alpha2().ResourceClaims(createClaim.Namespace).Create(tCtx, createClaim, metav1.CreateOptions{})
require.NoError(t, err, "create claim")
claim = storedClaim
} else {
require.NoError(t, store.Update(claim))
cachedClaim, err := testCtx.claimAssumeCache.Get(claimKey)
require.NoError(t, err, "retrieve old claim")
updateClaim := claim.DeepCopy()
// The test claim doesn't have those (generated dynamically), so copy them.
updateClaim.UID = cachedClaim.(*resourcev1alpha2.ResourceClaim).UID
updateClaim.ResourceVersion = cachedClaim.(*resourcev1alpha2.ResourceClaim).ResourceVersion
storedClaim, err := testCtx.client.ResourceV1alpha2().ResourceClaims(updateClaim.Namespace).Update(tCtx, updateClaim, metav1.UpdateOptions{})
require.NoError(t, err, "update claim")
claim = storedClaim
}
// Eventually the assume cache will have it, too.
require.EventuallyWithT(t, func(t *assert.CollectT) {
cachedClaim, err := testCtx.claimAssumeCache.Get(claimKey)
require.NoError(t, err, "retrieve claim")
if cachedClaim.(*resourcev1alpha2.ResourceClaim).ResourceVersion != claim.ResourceVersion {
t.Errorf("cached claim not updated yet")
}
}, time.Minute, time.Second, "claim assume cache must have new or updated claim")
// This has the actual UID and ResourceVersion,
// which is relevant for
// isSchedulableAfterClaimChange.
newObj = claim
}
actualHint, err := testCtx.p.isSchedulableAfterClaimChange(logger, tc.pod, tc.oldObj, tc.newObj)
actualHint, err := testCtx.p.isSchedulableAfterClaimChange(logger, tc.pod, oldObj, newObj)
if tc.expectedErr {
require.Error(t, err)
return