DRA scheduler: adapt to v1alpha3 API

The structured parameter allocation logic was written from scratch in
staging/src/k8s.io/dynamic-resource-allocation/structured where it might be
useful for out-of-tree components.

Besides the new features (amount, admin access) and API it now supports
backtracking when the initial device selection doesn't lead to a complete
allocation of all claims.

Co-authored-by: Ed Bartosh <eduard.bartosh@intel.com>
Co-authored-by: John Belamaric <jbelamaric@google.com>
This commit is contained in:
Patrick Ohly
2024-07-11 16:42:51 +02:00
parent 0fc78b9bcc
commit 599fe605f9
31 changed files with 2472 additions and 3115 deletions

View File

@@ -532,28 +532,10 @@ func addAllEventHandlers(
)
handlers = append(handlers, handlerRegistration)
}
case framework.ResourceClass:
case framework.DeviceClass:
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
if handlerRegistration, err = informerFactory.Resource().V1alpha3().ResourceClasses().Informer().AddEventHandler(
buildEvtResHandler(at, framework.ResourceClass, "ResourceClass"),
); err != nil {
return err
}
handlers = append(handlers, handlerRegistration)
}
case framework.ResourceClaimParameters:
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
if handlerRegistration, err = informerFactory.Resource().V1alpha3().ResourceClaimParameters().Informer().AddEventHandler(
buildEvtResHandler(at, framework.ResourceClaimParameters, "ResourceClaimParameters"),
); err != nil {
return err
}
handlers = append(handlers, handlerRegistration)
}
case framework.ResourceClassParameters:
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
if handlerRegistration, err = informerFactory.Resource().V1alpha3().ResourceClassParameters().Informer().AddEventHandler(
buildEvtResHandler(at, framework.ResourceClassParameters, "ResourceClassParameters"),
if handlerRegistration, err = informerFactory.Resource().V1alpha3().DeviceClasses().Informer().AddEventHandler(
buildEvtResHandler(at, framework.DeviceClass, "DeviceClass"),
); err != nil {
return err
}

View File

@@ -232,11 +232,9 @@ func TestAddAllEventHandlers(t *testing.T) {
{
name: "DRA events disabled",
gvkMap: map[framework.GVK]framework.ActionType{
framework.PodSchedulingContext: framework.Add,
framework.ResourceClaim: framework.Add,
framework.ResourceClass: framework.Add,
framework.ResourceClaimParameters: framework.Add,
framework.ResourceClassParameters: framework.Add,
framework.PodSchedulingContext: framework.Add,
framework.ResourceClaim: framework.Add,
framework.DeviceClass: framework.Add,
},
expectStaticInformers: map[reflect.Type]bool{
reflect.TypeOf(&v1.Pod{}): true,
@@ -248,22 +246,18 @@ func TestAddAllEventHandlers(t *testing.T) {
{
name: "DRA events enabled",
gvkMap: map[framework.GVK]framework.ActionType{
framework.PodSchedulingContext: framework.Add,
framework.ResourceClaim: framework.Add,
framework.ResourceClass: framework.Add,
framework.ResourceClaimParameters: framework.Add,
framework.ResourceClassParameters: framework.Add,
framework.PodSchedulingContext: framework.Add,
framework.ResourceClaim: framework.Add,
framework.DeviceClass: framework.Add,
},
enableDRA: true,
expectStaticInformers: map[reflect.Type]bool{
reflect.TypeOf(&v1.Pod{}): true,
reflect.TypeOf(&v1.Node{}): true,
reflect.TypeOf(&v1.Namespace{}): true,
reflect.TypeOf(&resourceapi.PodSchedulingContext{}): true,
reflect.TypeOf(&resourceapi.ResourceClaim{}): true,
reflect.TypeOf(&resourceapi.ResourceClaimParameters{}): true,
reflect.TypeOf(&resourceapi.ResourceClass{}): true,
reflect.TypeOf(&resourceapi.ResourceClassParameters{}): true,
reflect.TypeOf(&v1.Pod{}): true,
reflect.TypeOf(&v1.Node{}): true,
reflect.TypeOf(&v1.Namespace{}): true,
reflect.TypeOf(&resourceapi.PodSchedulingContext{}): true,
reflect.TypeOf(&resourceapi.ResourceClaim{}): true,
reflect.TypeOf(&resourceapi.DeviceClass{}): true,
},
expectDynamicInformers: map[schema.GroupVersionResource]bool{},
},

View File

@@ -38,10 +38,10 @@ import (
resourceapiapply "k8s.io/client-go/applyconfigurations/resource/v1alpha3"
"k8s.io/client-go/kubernetes"
resourcelisters "k8s.io/client-go/listers/resource/v1alpha3"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/retry"
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
"k8s.io/dynamic-resource-allocation/resourceclaim"
"k8s.io/dynamic-resource-allocation/structured"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
@@ -56,10 +56,6 @@ const (
Name = names.DynamicResources
stateKey framework.StateKey = Name
// generatedFromIndex is the lookup name for the index function
// which indexes by other resource which generated the parameters object.
generatedFromIndex = "generated-from-index"
)
// The state is initialized in PreFilter phase. Because we save the pointer in
@@ -82,9 +78,8 @@ type stateData struct {
// (if one exists) and the changes made to it.
podSchedulingState podSchedulingState
// resourceModel contains the information about available and allocated resources when using
// structured parameters and the pod needs this information.
resources resources
// Allocator handles claims with structured parameters.
allocator *structured.Allocator
// mutex must be locked while accessing any of the fields below.
mutex sync.Mutex
@@ -99,6 +94,9 @@ type stateData struct {
unavailableClaims sets.Set[int]
informationsForClaim []informationForClaim
// nodeAllocations caches the result of Filter for the nodes.
nodeAllocations map[string][]*resourceapi.AllocationResult
}
func (d *stateData) Clone() framework.StateData {
@@ -106,24 +104,20 @@ func (d *stateData) Clone() framework.StateData {
}
type informationForClaim struct {
// The availableOnNode node filter of the claim converted from the
// v1 API to nodeaffinity.NodeSelector by PreFilter for repeated
// evaluation in Filter. Nil for claim which don't have it.
availableOnNode *nodeaffinity.NodeSelector
// Node selectors based on the claim status (single entry, key is empty) if allocated,
// otherwise the device class AvailableOnNodes selectors (potentially multiple entries,
// key is the device class name).
availableOnNodes map[string]*nodeaffinity.NodeSelector
// The status of the claim got from the
// schedulingCtx by PreFilter for repeated
// evaluation in Filter. Nil for claim which don't have it.
status *resourceapi.ResourceClaimSchedulingStatus
// structuredParameters is true if the claim is handled via the builtin
// controller.
structuredParameters bool
controller *claimController
// Set by Reserved, published by PreBind.
allocation *resourceapi.AllocationResult
allocationDriverName string
allocation *resourceapi.AllocationResult
}
type podSchedulingState struct {
@@ -276,19 +270,9 @@ type dynamicResources struct {
enabled bool
fh framework.Handle
clientset kubernetes.Interface
classLister resourcelisters.ResourceClassLister
classLister resourcelisters.DeviceClassLister
podSchedulingContextLister resourcelisters.PodSchedulingContextLister
claimParametersLister resourcelisters.ResourceClaimParametersLister
classParametersLister resourcelisters.ResourceClassParametersLister
resourceSliceLister resourcelisters.ResourceSliceLister
claimNameLookup *resourceclaim.Lookup
// claimParametersIndexer has the common claimParametersGeneratedFrom indexer installed to
// limit iteration over claimParameters to those of interest.
claimParametersIndexer cache.Indexer
// classParametersIndexer has the common classParametersGeneratedFrom indexer installed to
// limit iteration over classParameters to those of interest.
classParametersIndexer cache.Indexer
sliceLister resourcelisters.ResourceSliceLister
// claimAssumeCache enables temporarily storing a newer claim object
// while the scheduler has allocated it and the corresponding object
@@ -357,61 +341,15 @@ func New(ctx context.Context, plArgs runtime.Object, fh framework.Handle, fts fe
enabled: true,
fh: fh,
clientset: fh.ClientSet(),
classLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceClasses().Lister(),
classLister: fh.SharedInformerFactory().Resource().V1alpha3().DeviceClasses().Lister(),
podSchedulingContextLister: fh.SharedInformerFactory().Resource().V1alpha3().PodSchedulingContexts().Lister(),
claimParametersLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceClaimParameters().Lister(),
claimParametersIndexer: fh.SharedInformerFactory().Resource().V1alpha3().ResourceClaimParameters().Informer().GetIndexer(),
classParametersLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceClassParameters().Lister(),
classParametersIndexer: fh.SharedInformerFactory().Resource().V1alpha3().ResourceClassParameters().Informer().GetIndexer(),
resourceSliceLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceSlices().Lister(),
claimNameLookup: resourceclaim.NewNameLookup(fh.ClientSet()),
sliceLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceSlices().Lister(),
claimAssumeCache: fh.ResourceClaimCache(),
}
if err := pl.claimParametersIndexer.AddIndexers(cache.Indexers{generatedFromIndex: claimParametersGeneratedFromIndexFunc}); err != nil {
return nil, fmt.Errorf("add claim parameters cache indexer: %w", err)
}
if err := pl.classParametersIndexer.AddIndexers(cache.Indexers{generatedFromIndex: classParametersGeneratedFromIndexFunc}); err != nil {
return nil, fmt.Errorf("add class parameters cache indexer: %w", err)
}
return pl, nil
}
func claimParametersReferenceKeyFunc(namespace string, ref *resourceapi.ResourceClaimParametersReference) string {
return ref.APIGroup + "/" + ref.Kind + "/" + namespace + "/" + ref.Name
}
// claimParametersGeneratedFromIndexFunc is an index function that returns other resource keys
// (= apiGroup/kind/namespace/name) for ResourceClaimParametersReference in a given claim parameters.
func claimParametersGeneratedFromIndexFunc(obj interface{}) ([]string, error) {
parameters, ok := obj.(*resourceapi.ResourceClaimParameters)
if !ok {
return nil, nil
}
if parameters.GeneratedFrom == nil {
return nil, nil
}
return []string{claimParametersReferenceKeyFunc(parameters.Namespace, parameters.GeneratedFrom)}, nil
}
func classParametersReferenceKeyFunc(ref *resourceapi.ResourceClassParametersReference) string {
return ref.APIGroup + "/" + ref.Kind + "/" + ref.Namespace + "/" + ref.Name
}
// classParametersGeneratedFromIndexFunc is an index function that returns other resource keys
// (= apiGroup/kind/namespace/name) for ResourceClassParametersReference in a given class parameters.
func classParametersGeneratedFromIndexFunc(obj interface{}) ([]string, error) {
parameters, ok := obj.(*resourceapi.ResourceClassParameters)
if !ok {
return nil, nil
}
if parameters.GeneratedFrom == nil {
return nil, nil
}
return []string{classParametersReferenceKeyFunc(parameters.GeneratedFrom)}, nil
}
var _ framework.PreEnqueuePlugin = &dynamicResources{}
var _ framework.PreFilterPlugin = &dynamicResources{}
var _ framework.FilterPlugin = &dynamicResources{}
@@ -435,11 +373,6 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu
}
events := []framework.ClusterEventWithHint{
// Changes for claim or class parameters creation may make pods
// schedulable which depend on claims using those parameters.
{Event: framework.ClusterEvent{Resource: framework.ResourceClaimParameters, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimParametersChange},
{Event: framework.ClusterEvent{Resource: framework.ResourceClassParameters, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClassParametersChange},
// Allocation is tracked in ResourceClaims, so any changes may make the pods schedulable.
{Event: framework.ClusterEvent{Resource: framework.ResourceClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimChange},
// When a driver has provided additional information, a pod waiting for that information
@@ -458,7 +391,7 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu
// See: https://github.com/kubernetes/kubernetes/issues/110175
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}},
// A pod might be waiting for a class to get created or modified.
{Event: framework.ClusterEvent{Resource: framework.ResourceClass, ActionType: framework.Add | framework.Update}},
{Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}},
}
return events, nil
}
@@ -473,149 +406,6 @@ func (pl *dynamicResources) PreEnqueue(ctx context.Context, pod *v1.Pod) (status
return nil
}
// isSchedulableAfterClaimParametersChange is invoked for add and update claim parameters events reported by
// an informer. It checks whether that change made a previously unschedulable
// pod schedulable. It errs on the side of letting a pod scheduling attempt
// happen. The delete claim event will not invoke it, so newObj will never be nil.
func (pl *dynamicResources) isSchedulableAfterClaimParametersChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalParameters, modifiedParameters, err := schedutil.As[*resourceapi.ResourceClaimParameters](oldObj, newObj)
if err != nil {
// Shouldn't happen.
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimParametersChange: %w", err)
}
usesParameters := false
if err := pl.foreachPodResourceClaim(pod, func(_ string, claim *resourceapi.ResourceClaim) {
ref := claim.Spec.ParametersRef
if ref == nil {
return
}
// Using in-tree parameters directly?
if ref.APIGroup == resourceapi.SchemeGroupVersion.Group &&
ref.Kind == "ResourceClaimParameters" {
if modifiedParameters.Name == ref.Name {
usesParameters = true
}
return
}
// Need to look for translated parameters.
generatedFrom := modifiedParameters.GeneratedFrom
if generatedFrom == nil {
return
}
if generatedFrom.APIGroup == ref.APIGroup &&
generatedFrom.Kind == ref.Kind &&
generatedFrom.Name == ref.Name {
usesParameters = true
}
}); err != nil {
// This is not an unexpected error: we know that
// foreachPodResourceClaim only returns errors for "not
// schedulable".
logger.V(4).Info("pod is not schedulable", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedParameters), "reason", err.Error())
return framework.QueueSkip, nil
}
if !usesParameters {
// This were not the parameters the pod was waiting for.
logger.V(6).Info("unrelated claim parameters got modified", "pod", klog.KObj(pod), "claimParameters", klog.KObj(modifiedParameters))
return framework.QueueSkip, nil
}
if originalParameters == nil {
logger.V(4).Info("claim parameters for pod got created", "pod", klog.KObj(pod), "claimParameters", klog.KObj(modifiedParameters))
return framework.Queue, nil
}
// Modifications may or may not be relevant. If the entire
// requests are as before, then something else must have changed
// and we don't care.
if apiequality.Semantic.DeepEqual(&originalParameters.DriverRequests, &modifiedParameters.DriverRequests) {
logger.V(6).Info("claim parameters for pod got modified where the pod doesn't care", "pod", klog.KObj(pod), "claimParameters", klog.KObj(modifiedParameters))
return framework.QueueSkip, nil
}
logger.V(4).Info("requests in claim parameters for pod got updated", "pod", klog.KObj(pod), "claimParameters", klog.KObj(modifiedParameters))
return framework.Queue, nil
}
// isSchedulableAfterClassParametersChange is invoked for add and update class parameters events reported by
// an informer. It checks whether that change made a previously unschedulable
// pod schedulable. It errs on the side of letting a pod scheduling attempt
// happen. The delete class event will not invoke it, so newObj will never be nil.
func (pl *dynamicResources) isSchedulableAfterClassParametersChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalParameters, modifiedParameters, err := schedutil.As[*resourceapi.ResourceClassParameters](oldObj, newObj)
if err != nil {
// Shouldn't happen.
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClassParametersChange: %w", err)
}
usesParameters := false
if err := pl.foreachPodResourceClaim(pod, func(_ string, claim *resourceapi.ResourceClaim) {
class, err := pl.classLister.Get(claim.Spec.ResourceClassName)
if err != nil {
if !apierrors.IsNotFound(err) {
logger.Error(err, "look up resource class")
}
return
}
ref := class.ParametersRef
if ref == nil {
return
}
// Using in-tree parameters directly?
if ref.APIGroup == resourceapi.SchemeGroupVersion.Group &&
ref.Kind == "ResourceClassParameters" {
if modifiedParameters.Name == ref.Name {
usesParameters = true
}
return
}
// Need to look for translated parameters.
generatedFrom := modifiedParameters.GeneratedFrom
if generatedFrom == nil {
return
}
if generatedFrom.APIGroup == ref.APIGroup &&
generatedFrom.Kind == ref.Kind &&
generatedFrom.Name == ref.Name {
usesParameters = true
}
}); err != nil {
// This is not an unexpected error: we know that
// foreachPodResourceClaim only returns errors for "not
// schedulable".
logger.V(4).Info("pod is not schedulable", "pod", klog.KObj(pod), "classParameters", klog.KObj(modifiedParameters), "reason", err.Error())
return framework.QueueSkip, nil
}
if !usesParameters {
// This were not the parameters the pod was waiting for.
logger.V(6).Info("unrelated class parameters got modified", "pod", klog.KObj(pod), "classParameters", klog.KObj(modifiedParameters))
return framework.QueueSkip, nil
}
if originalParameters == nil {
logger.V(4).Info("class parameters for pod got created", "pod", klog.KObj(pod), "class", klog.KObj(modifiedParameters))
return framework.Queue, nil
}
// Modifications may or may not be relevant. If the entire
// requests are as before, then something else must have changed
// and we don't care.
if apiequality.Semantic.DeepEqual(&originalParameters.Filters, &modifiedParameters.Filters) {
logger.V(6).Info("class parameters for pod got modified where the pod doesn't care", "pod", klog.KObj(pod), "classParameters", klog.KObj(modifiedParameters))
return framework.QueueSkip, nil
}
logger.V(4).Info("filters in class parameters for pod got updated", "pod", klog.KObj(pod), "classParameters", klog.KObj(modifiedParameters))
return framework.Queue, nil
}
// isSchedulableAfterClaimChange is invoked for add and update claim events reported by
// an informer. It checks whether that change made a previously unschedulable
// pod schedulable. It errs on the side of letting a pod scheduling attempt
@@ -641,7 +431,8 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
}
if originalClaim != nil &&
resourceclaim.IsAllocatedWithStructuredParameters(originalClaim) &&
originalClaim.Status.Allocation != nil &&
originalClaim.Status.Allocation.Controller == "" &&
modifiedClaim.Status.Allocation == nil {
// A claim with structured parameters was deallocated. This might have made
// resources available for other pods.
@@ -823,7 +614,7 @@ func (pl *dynamicResources) podResourceClaims(pod *v1.Pod) ([]*resourceapi.Resou
// It calls an optional handler for those claims that it finds.
func (pl *dynamicResources) foreachPodResourceClaim(pod *v1.Pod, cb func(podResourceName string, claim *resourceapi.ResourceClaim)) error {
for _, resource := range pod.Spec.ResourceClaims {
claimName, mustCheckOwner, err := pl.claimNameLookup.Name(pod, &resource)
claimName, mustCheckOwner, err := resourceclaim.Name(pod, &resource)
if err != nil {
return err
}
@@ -892,8 +683,10 @@ func (pl *dynamicResources) PreFilter(ctx context.Context, state *framework.Cycl
return nil, statusError(logger, err)
}
// All claims which the scheduler needs to allocate itself.
allocateClaims := make([]*resourceapi.ResourceClaim, 0, len(claims))
s.informationsForClaim = make([]informationForClaim, len(claims))
needResourceInformation := false
for index, claim := range claims {
if claim.Status.DeallocationRequested {
// This will get resolved by the resource driver.
@@ -907,44 +700,19 @@ func (pl *dynamicResources) PreFilter(ctx context.Context, state *framework.Cycl
}
if claim.Status.Allocation != nil {
if claim.Status.Allocation.AvailableOnNodes != nil {
nodeSelector, err := nodeaffinity.NewNodeSelector(claim.Status.Allocation.AvailableOnNodes)
s.informationsForClaim[index].structuredParameters = claim.Status.Allocation.Controller == ""
if claim.Status.Allocation.NodeSelector != nil {
nodeSelector, err := nodeaffinity.NewNodeSelector(claim.Status.Allocation.NodeSelector)
if err != nil {
return nil, statusError(logger, err)
}
s.informationsForClaim[index].availableOnNode = nodeSelector
s.informationsForClaim[index].availableOnNodes = map[string]*nodeaffinity.NodeSelector{"": nodeSelector}
}
// The claim was allocated by the scheduler if it has the finalizer that is
// reserved for Kubernetes.
s.informationsForClaim[index].structuredParameters = slices.Contains(claim.Finalizers, resourceapi.Finalizer)
} else {
// The ResourceClass might have a node filter. This is
// useful for trimming the initial set of potential
// nodes before we ask the driver(s) for information
// about the specific pod.
class, err := pl.classLister.Get(claim.Spec.ResourceClassName)
if err != nil {
// If the class cannot be retrieved, allocation cannot proceed.
if apierrors.IsNotFound(err) {
// Here we mark the pod as "unschedulable", so it'll sleep in
// the unscheduleable queue until a ResourceClass event occurs.
return nil, statusUnschedulable(logger, fmt.Sprintf("resource class %s does not exist", claim.Spec.ResourceClassName))
}
// Other error, retry with backoff.
return nil, statusError(logger, fmt.Errorf("look up resource class: %v", err))
}
if class.SuitableNodes != nil {
selector, err := nodeaffinity.NewNodeSelector(class.SuitableNodes)
if err != nil {
return nil, statusError(logger, err)
}
s.informationsForClaim[index].availableOnNode = selector
}
s.informationsForClaim[index].status = statusForClaim(s.podSchedulingState.schedulingCtx, pod.Spec.ResourceClaims[index].Name)
if class.StructuredParameters != nil && *class.StructuredParameters {
s.informationsForClaim[index].structuredParameters = true
structuredParameters := claim.Spec.Controller == ""
s.informationsForClaim[index].structuredParameters = structuredParameters
if structuredParameters {
allocateClaims = append(allocateClaims, claim)
// Allocation in flight? Better wait for that
// to finish, see inFlightAllocations
@@ -952,164 +720,93 @@ func (pl *dynamicResources) PreFilter(ctx context.Context, state *framework.Cycl
if _, found := pl.inFlightAllocations.Load(claim.UID); found {
return nil, statusUnschedulable(logger, fmt.Sprintf("resource claim %s is in the process of being allocated", klog.KObj(claim)))
}
} else {
s.informationsForClaim[index].status = statusForClaim(s.podSchedulingState.schedulingCtx, pod.Spec.ResourceClaims[index].Name)
}
// We need the claim and class parameters. If
// they don't exist yet, the pod has to wait.
//
// TODO (https://github.com/kubernetes/kubernetes/issues/123697):
// check this already in foreachPodResourceClaim, together with setting up informationsForClaim.
// Then PreEnqueue will also check for existence of parameters.
classParameters, claimParameters, status := pl.lookupParameters(logger, class, claim)
if status != nil {
return nil, status
// Check all requests and device classes. If a class
// does not exist, scheduling cannot proceed, no matter
// how the claim is being allocated.
//
// When using a control plane controller, a class might
// have a node filter. This is useful for trimming the
// initial set of potential nodes before we ask the
// driver(s) for information about the specific pod.
for _, request := range claim.Spec.Devices.Requests {
if request.DeviceClassName == "" {
return nil, statusError(logger, fmt.Errorf("request %s: unsupported request type", request.Name))
}
controller, err := newClaimController(logger, class, classParameters, claimParameters)
class, err := pl.classLister.Get(request.DeviceClassName)
if err != nil {
return nil, statusError(logger, err)
// If the class cannot be retrieved, allocation cannot proceed.
if apierrors.IsNotFound(err) {
// Here we mark the pod as "unschedulable", so it'll sleep in
// the unscheduleable queue until a DeviceClass event occurs.
return nil, statusUnschedulable(logger, fmt.Sprintf("request %s: device class %s does not exist", request.Name, request.DeviceClassName))
}
// Other error, retry with backoff.
return nil, statusError(logger, fmt.Errorf("request %s: look up device class: %w", request.Name, err))
}
if class.Spec.SuitableNodes != nil && !structuredParameters {
selector, err := nodeaffinity.NewNodeSelector(class.Spec.SuitableNodes)
if err != nil {
return nil, statusError(logger, err)
}
if s.informationsForClaim[index].availableOnNodes == nil {
s.informationsForClaim[index].availableOnNodes = make(map[string]*nodeaffinity.NodeSelector)
}
s.informationsForClaim[index].availableOnNodes[class.Name] = selector
}
s.informationsForClaim[index].controller = controller
needResourceInformation = true
}
}
}
if needResourceInformation {
if len(allocateClaims) > 0 {
logger.V(5).Info("Preparing allocation with structured parameters", "pod", klog.KObj(pod), "resourceclaims", klog.KObjSlice(allocateClaims))
// Doing this over and over again for each pod could be avoided
// by parsing once when creating the plugin and then updating
// that state in informer callbacks. But that would cause
// problems for using the plugin in the Cluster Autoscaler. If
// this step here turns out to be expensive, we may have to
// maintain and update state more persistently.
// by setting the allocator up once and then keeping it up-to-date
// as changes are observed.
//
// But that would cause problems for using the plugin in the
// Cluster Autoscaler. If this step here turns out to be
// expensive, we may have to maintain and update state more
// persistently.
//
// Claims are treated as "allocated" if they are in the assume cache
// or currently their allocation is in-flight.
resources, err := newResourceModel(logger, pl.resourceSliceLister, pl.claimAssumeCache, &pl.inFlightAllocations)
logger.V(5).Info("Resource usage", "resources", klog.Format(resources))
allocator, err := structured.NewAllocator(ctx, allocateClaims, &claimListerForAssumeCache{assumeCache: pl.claimAssumeCache, inFlightAllocations: &pl.inFlightAllocations}, pl.classLister, pl.sliceLister)
if err != nil {
return nil, statusError(logger, err)
}
s.resources = resources
s.allocator = allocator
s.nodeAllocations = make(map[string][]*resourceapi.AllocationResult)
}
s.claims = claims
return nil, nil
}
func (pl *dynamicResources) lookupParameters(logger klog.Logger, class *resourceapi.ResourceClass, claim *resourceapi.ResourceClaim) (classParameters *resourceapi.ResourceClassParameters, claimParameters *resourceapi.ResourceClaimParameters, status *framework.Status) {
classParameters, status = pl.lookupClassParameters(logger, class)
if status != nil {
return
}
claimParameters, status = pl.lookupClaimParameters(logger, class, claim)
return
type claimListerForAssumeCache struct {
assumeCache *assumecache.AssumeCache
inFlightAllocations *sync.Map
}
func (pl *dynamicResources) lookupClassParameters(logger klog.Logger, class *resourceapi.ResourceClass) (*resourceapi.ResourceClassParameters, *framework.Status) {
defaultClassParameters := resourceapi.ResourceClassParameters{}
if class.ParametersRef == nil {
return &defaultClassParameters, nil
}
if class.ParametersRef.APIGroup == resourceapi.SchemeGroupVersion.Group &&
class.ParametersRef.Kind == "ResourceClassParameters" {
// Use the parameters which were referenced directly.
parameters, err := pl.classParametersLister.ResourceClassParameters(class.ParametersRef.Namespace).Get(class.ParametersRef.Name)
if err != nil {
if apierrors.IsNotFound(err) {
return nil, statusUnschedulable(logger, fmt.Sprintf("class parameters %s not found", klog.KRef(class.ParametersRef.Namespace, class.ParametersRef.Name)))
}
return nil, statusError(logger, fmt.Errorf("get class parameters %s: %v", klog.KRef(class.Namespace, class.ParametersRef.Name), err))
func (cl *claimListerForAssumeCache) ListAllAllocated() ([]*resourceapi.ResourceClaim, error) {
// Probably not worth adding an index for?
objs := cl.assumeCache.List(nil)
allocated := make([]*resourceapi.ResourceClaim, 0, len(objs))
for _, obj := range objs {
claim := obj.(*resourceapi.ResourceClaim)
if obj, ok := cl.inFlightAllocations.Load(claim.UID); ok {
claim = obj.(*resourceapi.ResourceClaim)
}
return parameters, nil
}
objs, err := pl.classParametersIndexer.ByIndex(generatedFromIndex, classParametersReferenceKeyFunc(class.ParametersRef))
if err != nil {
return nil, statusError(logger, fmt.Errorf("listing class parameters failed: %v", err))
}
switch len(objs) {
case 0:
return nil, statusUnschedulable(logger, fmt.Sprintf("generated class parameters for %s.%s %s not found", class.ParametersRef.Kind, class.ParametersRef.APIGroup, klog.KRef(class.ParametersRef.Namespace, class.ParametersRef.Name)))
case 1:
parameters, ok := objs[0].(*resourceapi.ResourceClassParameters)
if !ok {
return nil, statusError(logger, fmt.Errorf("unexpected object in class parameters index: %T", objs[0]))
if claim.Status.Allocation != nil {
allocated = append(allocated, claim)
}
return parameters, nil
default:
sort.Slice(objs, func(i, j int) bool {
obj1, obj2 := objs[i].(*resourceapi.ResourceClassParameters), objs[j].(*resourceapi.ResourceClassParameters)
if obj1 == nil || obj2 == nil {
return false
}
return obj1.Name < obj2.Name
})
return nil, statusError(logger, fmt.Errorf("multiple generated class parameters for %s.%s %s found: %s", class.ParametersRef.Kind, class.ParametersRef.APIGroup, klog.KRef(class.Namespace, class.ParametersRef.Name), klog.KObjSlice(objs)))
}
}
func (pl *dynamicResources) lookupClaimParameters(logger klog.Logger, class *resourceapi.ResourceClass, claim *resourceapi.ResourceClaim) (*resourceapi.ResourceClaimParameters, *framework.Status) {
defaultClaimParameters := resourceapi.ResourceClaimParameters{
DriverRequests: []resourceapi.DriverRequests{
{
DriverName: class.DriverName,
Requests: []resourceapi.ResourceRequest{
{
ResourceRequestModel: resourceapi.ResourceRequestModel{
// TODO: This only works because NamedResources is
// the only model currently implemented. We need to
// match the default to how the resources of this
// class are being advertized in a ResourceSlice.
NamedResources: &resourceapi.NamedResourcesRequest{
Selector: "true",
},
},
},
},
},
},
}
if claim.Spec.ParametersRef == nil {
return &defaultClaimParameters, nil
}
if claim.Spec.ParametersRef.APIGroup == resourceapi.SchemeGroupVersion.Group &&
claim.Spec.ParametersRef.Kind == "ResourceClaimParameters" {
// Use the parameters which were referenced directly.
parameters, err := pl.claimParametersLister.ResourceClaimParameters(claim.Namespace).Get(claim.Spec.ParametersRef.Name)
if err != nil {
if apierrors.IsNotFound(err) {
return nil, statusUnschedulable(logger, fmt.Sprintf("claim parameters %s not found", klog.KRef(claim.Namespace, claim.Spec.ParametersRef.Name)))
}
return nil, statusError(logger, fmt.Errorf("get claim parameters %s: %v", klog.KRef(claim.Namespace, claim.Spec.ParametersRef.Name), err))
}
return parameters, nil
}
objs, err := pl.claimParametersIndexer.ByIndex(generatedFromIndex, claimParametersReferenceKeyFunc(claim.Namespace, claim.Spec.ParametersRef))
if err != nil {
return nil, statusError(logger, fmt.Errorf("listing claim parameters failed: %v", err))
}
switch len(objs) {
case 0:
return nil, statusUnschedulable(logger, fmt.Sprintf("generated claim parameters for %s.%s %s not found", claim.Spec.ParametersRef.Kind, claim.Spec.ParametersRef.APIGroup, klog.KRef(claim.Namespace, claim.Spec.ParametersRef.Name)))
case 1:
parameters, ok := objs[0].(*resourceapi.ResourceClaimParameters)
if !ok {
return nil, statusError(logger, fmt.Errorf("unexpected object in claim parameters index: %T", objs[0]))
}
return parameters, nil
default:
sort.Slice(objs, func(i, j int) bool {
obj1, obj2 := objs[i].(*resourceapi.ResourceClaimParameters), objs[j].(*resourceapi.ResourceClaimParameters)
if obj1 == nil || obj2 == nil {
return false
}
return obj1.Name < obj2.Name
})
return nil, statusError(logger, fmt.Errorf("multiple generated claim parameters for %s.%s %s found: %s", claim.Spec.ParametersRef.Kind, claim.Spec.ParametersRef.APIGroup, klog.KRef(claim.Namespace, claim.Spec.ParametersRef.Name), klog.KObjSlice(objs)))
}
return allocated, nil
}
// PreFilterExtensions returns prefilter extensions, pod add and remove.
@@ -1158,10 +855,11 @@ func (pl *dynamicResources) Filter(ctx context.Context, cs *framework.CycleState
logger.V(10).Info("filtering based on resource claims of the pod", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
if claim.Status.Allocation != nil {
if nodeSelector := state.informationsForClaim[index].availableOnNode; nodeSelector != nil {
for _, nodeSelector := range state.informationsForClaim[index].availableOnNodes {
if !nodeSelector.Match(node) {
logger.V(5).Info("AvailableOnNodes does not match", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
unavailableClaims = append(unavailableClaims, index)
break
}
}
continue
@@ -1172,40 +870,61 @@ func (pl *dynamicResources) Filter(ctx context.Context, cs *framework.CycleState
return statusUnschedulable(logger, "resourceclaim must be reallocated", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
}
if selector := state.informationsForClaim[index].availableOnNode; selector != nil {
if matches := selector.Match(node); !matches {
return statusUnschedulable(logger, "excluded by resource class node filter", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclassName", claim.Spec.ResourceClassName)
for className, nodeSelector := range state.informationsForClaim[index].availableOnNodes {
if !nodeSelector.Match(node) {
return statusUnschedulable(logger, "excluded by device class node filter", "pod", klog.KObj(pod), "node", klog.KObj(node), "deviceclass", klog.KRef("", className))
}
}
// Can the builtin controller tell us whether the node is suitable?
if state.informationsForClaim[index].structuredParameters {
suitable, err := state.informationsForClaim[index].controller.nodeIsSuitable(ctx, node.Name, state.resources)
if err != nil {
// An error indicates that something wasn't configured correctly, for example
// writing a CEL expression which doesn't handle a map lookup error. Normally
// this should never fail. We could return an error here, but then the pod
// would get retried. Instead we ignore the node.
return statusUnschedulable(logger, fmt.Sprintf("checking structured parameters failed: %v", err), "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
}
if !suitable {
return statusUnschedulable(logger, "resourceclaim cannot be allocated for the node (unsuitable)", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
}
} else {
if status := state.informationsForClaim[index].status; status != nil {
for _, unsuitableNode := range status.UnsuitableNodes {
if node.Name == unsuitableNode {
return statusUnschedulable(logger, "resourceclaim cannot be allocated for the node (unsuitable)", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim), "unsuitablenodes", status.UnsuitableNodes)
}
// Use information from control plane controller?
if status := state.informationsForClaim[index].status; status != nil {
for _, unsuitableNode := range status.UnsuitableNodes {
if node.Name == unsuitableNode {
return statusUnschedulable(logger, "resourceclaim cannot be allocated for the node (unsuitable)", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim), "unsuitablenodes", status.UnsuitableNodes)
}
}
}
}
// Use allocator to check the node and cache the result in case that the node is picked.
var allocations []*resourceapi.AllocationResult
if state.allocator != nil {
allocCtx := ctx
if loggerV := logger.V(5); loggerV.Enabled() {
allocCtx = klog.NewContext(allocCtx, klog.LoggerWithValues(logger, "node", klog.KObj(node)))
}
a, err := state.allocator.Allocate(allocCtx, node)
if err != nil {
// This should only fail if there is something wrong with the claim or class.
// Return an error to abort scheduling of it.
//
// This will cause retries. It would be slightly nicer to mark it as unschedulable
// *and* abort scheduling. Then only cluster event for updating the claim or class
// with the broken CEL expression would trigger rescheduling.
//
// But we cannot do both. As this shouldn't occur often, aborting like this is
// better than the more complicated alternative (return Unschedulable here, remember
// the error, then later raise it again later if needed).
return statusError(logger, err, "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaims", klog.KObjSlice(state.allocator.ClaimsToAllocate()))
}
// Check for exact length just to be sure. In practice this is all-or-nothing.
if len(a) != len(state.allocator.ClaimsToAllocate()) {
return statusUnschedulable(logger, "cannot allocate all claims", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaims", klog.KObjSlice(state.allocator.ClaimsToAllocate()))
}
// Reserve uses this information.
allocations = a
}
// Store information in state while holding the mutex.
if state.allocator != nil || len(unavailableClaims) > 0 {
state.mutex.Lock()
defer state.mutex.Unlock()
}
if len(unavailableClaims) > 0 {
// Remember all unavailable claims. This might be observed
// concurrently, so we have to lock the state before writing.
state.mutex.Lock()
defer state.mutex.Unlock()
if state.unavailableClaims == nil {
state.unavailableClaims = sets.New[int]()
@@ -1217,6 +936,10 @@ func (pl *dynamicResources) Filter(ctx context.Context, cs *framework.CycleState
return statusUnschedulable(logger, "resourceclaim not available on the node", "pod", klog.KObj(pod))
}
if state.allocator != nil {
state.nodeAllocations[node.Name] = allocations
}
return nil
}
@@ -1266,7 +989,6 @@ func (pl *dynamicResources) PostFilter(ctx context.Context, cs *framework.CycleS
claim := claim.DeepCopy()
claim.Status.ReservedFor = nil
if clearAllocation {
claim.Status.DriverName = ""
claim.Status.Allocation = nil
} else {
claim.Status.DeallocationRequested = true
@@ -1303,7 +1025,7 @@ func (pl *dynamicResources) PreScore(ctx context.Context, cs *framework.CycleSta
pending := false
for index, claim := range state.claims {
if claim.Status.Allocation == nil &&
state.informationsForClaim[index].controller == nil {
!state.informationsForClaim[index].structuredParameters {
pending = true
break
}
@@ -1379,10 +1101,11 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
return nil
}
logger := klog.FromContext(ctx)
numDelayedAllocationPending := 0
numClaimsWithStatusInfo := 0
claimsWithBuiltinController := make([]int, 0, len(state.claims))
logger := klog.FromContext(ctx)
numClaimsWithAllocator := 0
for index, claim := range state.claims {
if claim.Status.Allocation != nil {
// Allocated, but perhaps not reserved yet. We checked in PreFilter that
@@ -1393,9 +1116,9 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
continue
}
// Do we have the builtin controller?
if state.informationsForClaim[index].controller != nil {
claimsWithBuiltinController = append(claimsWithBuiltinController, index)
// Do we use the allocator for it?
if state.informationsForClaim[index].structuredParameters {
numClaimsWithAllocator++
continue
}
@@ -1409,7 +1132,7 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
}
}
if numDelayedAllocationPending == 0 && len(claimsWithBuiltinController) == 0 {
if numDelayedAllocationPending == 0 && numClaimsWithAllocator == 0 {
// Nothing left to do.
return nil
}
@@ -1430,27 +1153,41 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
}
// Prepare allocation of claims handled by the schedulder.
for _, index := range claimsWithBuiltinController {
claim := state.claims[index]
driverName, allocation, err := state.informationsForClaim[index].controller.allocate(ctx, nodeName, state.resources)
if err != nil {
if state.allocator != nil {
// Entries in these two slices match each other.
claimsToAllocate := state.allocator.ClaimsToAllocate()
allocations, ok := state.nodeAllocations[nodeName]
if !ok {
// We checked before that the node is suitable. This shouldn't have failed,
// so treat this as an error.
return statusError(logger, fmt.Errorf("claim allocation failed unexpectedly: %v", err))
return statusError(logger, errors.New("claim allocation not found for node"))
}
state.informationsForClaim[index].allocation = allocation
state.informationsForClaim[index].allocationDriverName = driverName
// Strictly speaking, we don't need to store the full modified object.
// The allocation would be enough. The full object is useful for
// debugging and testing, so let's make it realistic.
claim = claim.DeepCopy()
if !slices.Contains(claim.Finalizers, resourceapi.Finalizer) {
claim.Finalizers = append(claim.Finalizers, resourceapi.Finalizer)
// Sanity check: do we have results for all pending claims?
if len(allocations) != len(claimsToAllocate) ||
len(allocations) != numClaimsWithAllocator {
return statusError(logger, fmt.Errorf("internal error, have %d allocations, %d claims to allocate, want %d claims", len(allocations), len(claimsToAllocate), numClaimsWithAllocator))
}
for i, claim := range claimsToAllocate {
index := slices.Index(state.claims, claim)
if index < 0 {
return statusError(logger, fmt.Errorf("internal error, claim %s with allocation not found", claim.Name))
}
allocation := allocations[i]
state.informationsForClaim[index].allocation = allocation
// Strictly speaking, we don't need to store the full modified object.
// The allocation would be enough. The full object is useful for
// debugging, testing and the allocator, so let's make it realistic.
claim = claim.DeepCopy()
if !slices.Contains(claim.Finalizers, resourceapi.Finalizer) {
claim.Finalizers = append(claim.Finalizers, resourceapi.Finalizer)
}
claim.Status.Allocation = allocation
pl.inFlightAllocations.Store(claim.UID, claim)
logger.V(5).Info("Reserved resource in allocation result", "claim", klog.KObj(claim), "allocation", klog.Format(allocation))
}
claim.Status.DriverName = driverName
claim.Status.Allocation = allocation
pl.inFlightAllocations.Store(claim.UID, claim)
logger.V(5).Info("Reserved resource in allocation result", "claim", klog.KObj(claim), "driver", driverName, "allocation", klog.Format(allocation))
}
// When there is only one pending resource, we can go ahead with
@@ -1460,8 +1197,8 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
//
// If all pending claims are handled with the builtin controller,
// there is no need for a PodSchedulingContext change.
if numDelayedAllocationPending == 1 && len(claimsWithBuiltinController) == 0 ||
numClaimsWithStatusInfo+len(claimsWithBuiltinController) == numDelayedAllocationPending && len(claimsWithBuiltinController) < numDelayedAllocationPending {
if numDelayedAllocationPending == 1 && numClaimsWithAllocator == 0 ||
numClaimsWithStatusInfo+numClaimsWithAllocator == numDelayedAllocationPending && numClaimsWithAllocator < numDelayedAllocationPending {
// TODO: can we increase the chance that the scheduler picks
// the same node as before when allocation is on-going,
// assuming that that node still fits the pod? Picking a
@@ -1530,7 +1267,7 @@ func (pl *dynamicResources) Unreserve(ctx context.Context, cs *framework.CycleSt
for index, claim := range state.claims {
// If allocation was in-flight, then it's not anymore and we need to revert the
// claim object in the assume cache to what it was before.
if state.informationsForClaim[index].controller != nil {
if state.informationsForClaim[index].structuredParameters {
if _, found := pl.inFlightAllocations.LoadAndDelete(state.claims[index].UID); found {
pl.claimAssumeCache.Restore(claim.Namespace + "/" + claim.Name)
}
@@ -1661,8 +1398,6 @@ func (pl *dynamicResources) bindClaim(ctx context.Context, state *stateData, ind
}
claim = updatedClaim
}
claim.Status.DriverName = state.informationsForClaim[index].allocationDriverName
claim.Status.Allocation = allocation
}

View File

@@ -51,6 +51,11 @@ import (
var (
podKind = v1.SchemeGroupVersion.WithKind("Pod")
nodeName = "worker"
node2Name = "worker-2"
node3Name = "worker-3"
controller = "some-driver"
driver = controller
podName = "my-pod"
podUID = "1234"
resourceName = "my-resource"
@@ -59,45 +64,12 @@ var (
claimName2 = podName + "-" + resourceName + "-2"
className = "my-resource-class"
namespace = "default"
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
resourceClass = &resourceapi.ResourceClass{
deviceClass = &resourceapi.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className,
},
DriverName: "some-driver",
}
structuredResourceClass = &resourceapi.ResourceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className,
},
DriverName: "some-driver",
StructuredParameters: ptr.To(true),
}
structuredResourceClassWithParams = &resourceapi.ResourceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className,
},
DriverName: "some-driver",
StructuredParameters: ptr.To(true),
ParametersRef: &resourceapi.ResourceClassParametersReference{
Name: className,
Namespace: namespace,
Kind: "ResourceClassParameters",
APIGroup: "resource.k8s.io",
},
}
structuredResourceClassWithCRD = &resourceapi.ResourceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className,
},
DriverName: "some-driver",
StructuredParameters: ptr.To(true),
ParametersRef: &resourceapi.ResourceClassParametersReference{
Name: className,
Namespace: namespace,
Kind: "ResourceClassParameters",
APIGroup: "example.com",
},
}
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
@@ -124,39 +96,29 @@ var (
PodResourceClaims(v1.PodResourceClaim{Name: resourceName2, ResourceClaimName: &claimName2}).
Obj()
workerNode = &st.MakeNode().Name("worker").Label("kubernetes.io/hostname", "worker").Node
workerNodeSlice = st.MakeResourceSlice("worker", "some-driver").NamedResourcesInstances("instance-1").Obj()
// Node with "instance-1" device and no device attributes.
workerNode = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Node
workerNodeSlice = st.MakeResourceSlice(nodeName, driver).Device("instance-1", nil).Obj()
claimParameters = st.MakeClaimParameters().Name(claimName).Namespace(namespace).
NamedResourcesRequests("some-driver", "true").
GeneratedFrom(&resourceapi.ResourceClaimParametersReference{
Name: claimName,
Kind: "ResourceClaimParameters",
APIGroup: "example.com",
}).
Obj()
claimParametersOtherNamespace = st.MakeClaimParameters().Name(claimName).Namespace(namespace+"-2").
NamedResourcesRequests("some-driver", "true").
GeneratedFrom(&resourceapi.ResourceClaimParametersReference{
Name: claimName,
Kind: "ResourceClaimParameters",
APIGroup: "example.com",
}).
Obj()
classParameters = st.MakeClassParameters().Name(className).Namespace(namespace).
NamedResourcesFilters("some-driver", "true").
GeneratedFrom(&resourceapi.ResourceClassParametersReference{
Name: className,
Namespace: namespace,
Kind: "ResourceClassParameters",
APIGroup: "example.com",
}).
Obj()
// Node with same device, but now with a "healthy" boolean attribute.
workerNode2 = &st.MakeNode().Name(node2Name).Label("kubernetes.io/hostname", node2Name).Node
workerNode2Slice = st.MakeResourceSlice(node2Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
claim = st.MakeResourceClaim().
// Yet another node, same as the second one.
workerNode3 = &st.MakeNode().Name(node3Name).Label("kubernetes.io/hostname", node3Name).Node
workerNode3Slice = st.MakeResourceSlice(node3Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
brokenSelector = resourceapi.DeviceSelector{
CEL: &resourceapi.CELDeviceSelector{
// Not set for workerNode.
Expression: fmt.Sprintf(`device.attributes["%s"].%s`, driver, attrName),
},
}
claim = st.MakeResourceClaim(controller).
Name(claimName).
Namespace(namespace).
ResourceClassName(className).
Request(className).
Obj()
pendingClaim = st.FromResourceClaim(claim).
OwnerReference(podName, podUID, podKind).
@@ -164,44 +126,53 @@ var (
pendingClaim2 = st.FromResourceClaim(pendingClaim).
Name(claimName2).
Obj()
allocationResult = &resourceapi.AllocationResult{
Controller: controller,
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1",
}},
},
NodeSelector: func() *v1.NodeSelector {
// Label selector...
nodeSelector := st.MakeNodeSelector().In("metadata.name", []string{nodeName}).Obj()
// ... but we need a field selector, so let's swap.
nodeSelector.NodeSelectorTerms[0].MatchExpressions, nodeSelector.NodeSelectorTerms[0].MatchFields = nodeSelector.NodeSelectorTerms[0].MatchFields, nodeSelector.NodeSelectorTerms[0].MatchExpressions
return nodeSelector
}(),
}
deallocatingClaim = st.FromResourceClaim(pendingClaim).
Allocation("some-driver", &resourceapi.AllocationResult{}).
Allocation(allocationResult).
DeallocationRequested(true).
Obj()
inUseClaim = st.FromResourceClaim(pendingClaim).
Allocation("some-driver", &resourceapi.AllocationResult{}).
Allocation(allocationResult).
ReservedForPod(podName, types.UID(podUID)).
Obj()
structuredInUseClaim = st.FromResourceClaim(inUseClaim).
Structured("worker", "instance-1").
Structured().
Obj()
allocatedClaim = st.FromResourceClaim(pendingClaim).
Allocation("some-driver", &resourceapi.AllocationResult{}).
Allocation(allocationResult).
Obj()
pendingClaimWithParams = st.FromResourceClaim(pendingClaim).ParametersRef(claimName).Obj()
structuredAllocatedClaim = st.FromResourceClaim(allocatedClaim).Structured("worker", "instance-1").Obj()
structuredAllocatedClaimWithParams = st.FromResourceClaim(structuredAllocatedClaim).ParametersRef(claimName).Obj()
otherStructuredAllocatedClaim = st.FromResourceClaim(structuredAllocatedClaim).Name(structuredAllocatedClaim.Name + "-other").Obj()
allocatedClaimWithWrongTopology = st.FromResourceClaim(allocatedClaim).
Allocation("some-driver", &resourceapi.AllocationResult{AvailableOnNodes: st.MakeNodeSelector().In("no-such-label", []string{"no-such-value"}).Obj()}).
Allocation(&resourceapi.AllocationResult{Controller: controller, NodeSelector: st.MakeNodeSelector().In("no-such-label", []string{"no-such-value"}).Obj()}).
Obj()
structuredAllocatedClaimWithWrongTopology = st.FromResourceClaim(allocatedClaimWithWrongTopology).
Structured("worker-2", "instance-1").
Obj()
allocatedClaimWithGoodTopology = st.FromResourceClaim(allocatedClaim).
Allocation("some-driver", &resourceapi.AllocationResult{AvailableOnNodes: st.MakeNodeSelector().In("kubernetes.io/hostname", []string{"worker"}).Obj()}).
Allocation(&resourceapi.AllocationResult{Controller: controller, NodeSelector: st.MakeNodeSelector().In("kubernetes.io/hostname", []string{nodeName}).Obj()}).
Obj()
structuredAllocatedClaimWithGoodTopology = st.FromResourceClaim(allocatedClaimWithGoodTopology).
Structured("worker", "instance-1").
Obj()
otherClaim = st.MakeResourceClaim().
otherClaim = st.MakeResourceClaim(controller).
Name("not-my-claim").
Namespace(namespace).
ResourceClassName(className).
Request(className).
Obj()
otherAllocatedClaim = st.FromResourceClaim(otherClaim).
Allocation(allocationResult).
Obj()
scheduling = st.MakePodSchedulingContexts().Name(podName).Namespace(namespace).
OwnerReference(podName, podUID, podKind).
@@ -224,38 +195,37 @@ func reserve(claim *resourceapi.ResourceClaim, pod *v1.Pod) *resourceapi.Resourc
Obj()
}
// claimWithCRD replaces the in-tree group with "example.com".
func claimWithCRD(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
func structuredClaim(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(claim).
Structured().
Obj()
}
func breakCELInClaim(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claim = claim.DeepCopy()
claim.Spec.ParametersRef.APIGroup = "example.com"
for i := range claim.Spec.Devices.Requests {
for e := range claim.Spec.Devices.Requests[i].Selectors {
claim.Spec.Devices.Requests[i].Selectors[e] = brokenSelector
}
if len(claim.Spec.Devices.Requests[i].Selectors) == 0 {
claim.Spec.Devices.Requests[i].Selectors = []resourceapi.DeviceSelector{brokenSelector}
}
}
return claim
}
// classWithCRD replaces the in-tree group with "example.com".
func classWithCRD(class *resourceapi.ResourceClass) *resourceapi.ResourceClass {
func breakCELInClass(class *resourceapi.DeviceClass) *resourceapi.DeviceClass {
class = class.DeepCopy()
class.ParametersRef.APIGroup = "example.com"
for i := range class.Spec.Selectors {
class.Spec.Selectors[i] = brokenSelector
}
if len(class.Spec.Selectors) == 0 {
class.Spec.Selectors = []resourceapi.DeviceSelector{brokenSelector}
}
return class
}
func breakCELInClaimParameters(parameters *resourceapi.ResourceClaimParameters) *resourceapi.ResourceClaimParameters {
parameters = parameters.DeepCopy()
for i := range parameters.DriverRequests {
for e := range parameters.DriverRequests[i].Requests {
parameters.DriverRequests[i].Requests[e].NamedResources.Selector = `attributes.bool["no-such-attribute"]`
}
}
return parameters
}
func breakCELInClassParameters(parameters *resourceapi.ResourceClassParameters) *resourceapi.ResourceClassParameters {
parameters = parameters.DeepCopy()
for i := range parameters.Filters {
parameters.Filters[i].NamedResources.Selector = `attributes.bool["no-such-attribute"]`
}
return parameters
}
// result defines the expected outcome of some operation. It covers
// operation's status and the state of the world (= objects).
type result struct {
@@ -337,7 +307,7 @@ func TestPlugin(t *testing.T) {
nodes []*v1.Node // default if unset is workerNode
pod *v1.Pod
claims []*resourceapi.ResourceClaim
classes []*resourceapi.ResourceClass
classes []*resourceapi.DeviceClass
schedulings []*resourceapi.PodSchedulingContext
// objs get stored directly in the fake client, without passing
@@ -378,7 +348,7 @@ func TestPlugin(t *testing.T) {
},
"claim-reference-structured": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaim, otherClaim},
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaim), otherClaim},
want: want{
prebind: result{
changes: change{
@@ -412,7 +382,7 @@ func TestPlugin(t *testing.T) {
},
"claim-template-structured": {
pod: podWithClaimTemplateInStatus,
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaim, otherClaim},
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaim), otherClaim},
want: want{
prebind: result{
changes: change{
@@ -464,12 +434,12 @@ func TestPlugin(t *testing.T) {
},
"structured-no-resources": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.ResourceClass{structuredResourceClass},
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `resourceclaim cannot be allocated for the node (unsuitable)`),
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
postfilter: result{
@@ -479,28 +449,28 @@ func TestPlugin(t *testing.T) {
},
"structured-with-resources": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.ResourceClass{structuredResourceClass},
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaim: structuredAllocatedClaim,
inFlightClaim: structuredClaim(allocatedClaim),
},
prebind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim.Status = structuredInUseClaim.Status
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
claim.Status = structuredClaim(inUseClaim).Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
},
},
},
@@ -509,18 +479,18 @@ func TestPlugin(t *testing.T) {
// the scheduler got interrupted.
pod: podWithClaimName,
claims: func() []*resourceapi.ResourceClaim {
claim := pendingClaim.DeepCopy()
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim := structuredClaim(pendingClaim)
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
return []*resourceapi.ResourceClaim{claim}
}(),
classes: []*resourceapi.ResourceClass{structuredResourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaim: structuredAllocatedClaim,
inFlightClaim: structuredClaim(allocatedClaim),
},
prebind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
@@ -532,7 +502,7 @@ func TestPlugin(t *testing.T) {
},
},
postbind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
},
},
},
@@ -541,11 +511,11 @@ func TestPlugin(t *testing.T) {
// removed before the scheduler reaches PreBind.
pod: podWithClaimName,
claims: func() []*resourceapi.ResourceClaim {
claim := pendingClaim.DeepCopy()
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim := structuredClaim(pendingClaim)
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
return []*resourceapi.ResourceClaim{claim}
}(),
classes: []*resourceapi.ResourceClass{structuredResourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
prepare: prepare{
prebind: change{
@@ -557,15 +527,15 @@ func TestPlugin(t *testing.T) {
},
want: want{
reserve: result{
inFlightClaim: structuredAllocatedClaim,
inFlightClaim: structuredClaim(allocatedClaim),
},
prebind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
claim.Status = structuredInUseClaim.Status
}
return claim
@@ -573,7 +543,7 @@ func TestPlugin(t *testing.T) {
},
},
postbind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
},
},
},
@@ -581,23 +551,23 @@ func TestPlugin(t *testing.T) {
// No finalizer initially, then it gets added before
// the scheduler reaches PreBind. Shouldn't happen?
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.ResourceClass{structuredResourceClass},
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
prepare: prepare{
prebind: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
return claim
},
},
},
want: want{
reserve: result{
inFlightClaim: structuredAllocatedClaim,
inFlightClaim: structuredClaim(allocatedClaim),
},
prebind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
@@ -609,31 +579,31 @@ func TestPlugin(t *testing.T) {
},
},
postbind: result{
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
},
},
},
"structured-skip-bind": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.ResourceClass{structuredResourceClass},
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaim: structuredAllocatedClaim,
inFlightClaim: structuredClaim(allocatedClaim),
},
unreserveBeforePreBind: &result{},
},
},
"structured-exhausted-resources": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim, otherStructuredAllocatedClaim},
classes: []*resourceapi.ResourceClass{structuredResourceClass},
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim), structuredClaim(otherAllocatedClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `resourceclaim cannot be allocated for the node (unsuitable)`),
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
postfilter: result{
@@ -642,182 +612,70 @@ func TestPlugin(t *testing.T) {
},
},
"with-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
objs: []apiruntime.Object{claimParameters, classParameters, workerNodeSlice},
want: want{
reserve: result{
inFlightClaim: structuredAllocatedClaimWithParams,
},
prebind: result{
assumedClaim: reserve(structuredAllocatedClaimWithParams, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim.Status = structuredInUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(structuredAllocatedClaimWithParams, podWithClaimName),
},
},
},
"with-translated-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
objs: []apiruntime.Object{claimParameters, claimParametersOtherNamespace /* must be ignored */, classParameters, workerNodeSlice},
want: want{
reserve: result{
inFlightClaim: claimWithCRD(structuredAllocatedClaimWithParams),
},
prebind: result{
assumedClaim: reserve(claimWithCRD(structuredAllocatedClaimWithParams), podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = structuredAllocatedClaim.Finalizers
claim.Status = structuredInUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(claimWithCRD(structuredAllocatedClaimWithParams), podWithClaimName),
},
},
},
"missing-class-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
objs: []apiruntime.Object{claimParameters, workerNodeSlice},
want: want{
prefilter: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `class parameters default/my-resource-class not found`),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
},
},
},
"missing-claim-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
objs: []apiruntime.Object{classParameters, workerNodeSlice},
want: want{
prefilter: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `claim parameters default/my-pod-my-resource not found`),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
},
},
},
"missing-translated-class-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
objs: []apiruntime.Object{claimParameters, workerNodeSlice},
want: want{
prefilter: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `generated class parameters for ResourceClassParameters.example.com default/my-resource-class not found`),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
},
},
},
"missing-translated-claim-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
objs: []apiruntime.Object{classParameters, workerNodeSlice},
want: want{
prefilter: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `generated claim parameters for ResourceClaimParameters.example.com default/my-pod-my-resource not found`),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
},
},
},
"too-many-translated-class-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
objs: []apiruntime.Object{claimParameters, classParameters, st.FromClassParameters(classParameters).Name("other").Obj() /* too many */, workerNodeSlice},
want: want{
prefilter: result{
status: framework.AsStatus(errors.New(`multiple generated class parameters for ResourceClassParameters.example.com my-resource-class found: [default/my-resource-class default/other]`)),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
},
},
},
"too-many-translated-claim-parameters": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
objs: []apiruntime.Object{claimParameters, st.FromClaimParameters(claimParameters).Name("other").Obj() /* too many */, classParameters, workerNodeSlice},
want: want{
prefilter: result{
status: framework.AsStatus(errors.New(`multiple generated claim parameters for ResourceClaimParameters.example.com default/my-pod-my-resource found: [default/my-pod-my-resource default/other]`)),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
},
},
},
"claim-parameters-CEL-runtime-error": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
objs: []apiruntime.Object{breakCELInClaimParameters(claimParameters), classParameters, workerNodeSlice},
claims: []*resourceapi.ResourceClaim{breakCELInClaim(structuredClaim(pendingClaim))},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `checking structured parameters failed: checking node "worker" and resources of driver "some-driver": evaluate request CEL expression: no such key: no-such-attribute`),
status: framework.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
},
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `still not schedulable`),
},
},
},
"class-parameters-CEL-runtime-error": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
objs: []apiruntime.Object{claimParameters, breakCELInClassParameters(classParameters), workerNodeSlice},
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{breakCELInClass(deviceClass)},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `checking structured parameters failed: checking node "worker" and resources of driver "some-driver": evaluate filter CEL expression: no such key: no-such-attribute`),
status: framework.AsStatus(errors.New(`class my-resource-class: selector #0: CEL runtime error: no such key: ` + string(attrName))),
},
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `still not schedulable`),
},
},
// When pod scheduling encounters CEL runtime errors for some nodes, but not all,
// it should still not schedule the pod because there is something wrong with it.
// Scheduling it would make it harder to detect that there is a problem.
//
// This matches the "keeps pod pending because of CEL runtime errors" E2E test.
"CEL-runtime-error-for-one-of-two-nodes": {
nodes: []*v1.Node{workerNode, workerNode2},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{breakCELInClaim(structuredClaim(pendingClaim))},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: framework.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
},
},
},
},
// When two nodes where found, PreScore gets called.
"CEL-runtime-error-for-one-of-three-nodes": {
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{breakCELInClaim(structuredClaim(pendingClaim))},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice, workerNode3Slice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: `+string(attrName)),
},
},
prescore: result{
// This is the error found during Filter.
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `filter node worker: claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: healthy`),
},
},
},
@@ -839,7 +697,7 @@ func TestPlugin(t *testing.T) {
claims: []*resourceapi.ResourceClaim{pendingClaim},
want: want{
prefilter: result{
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("resource class %s does not exist", className)),
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("request req-1: device class %s does not exist", className)),
},
postfilter: result{
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
@@ -851,7 +709,7 @@ func TestPlugin(t *testing.T) {
// and select a node.
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.ResourceClass{resourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
prebind: result{
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
@@ -865,7 +723,7 @@ func TestPlugin(t *testing.T) {
// there are multiple claims.
pod: podWithTwoClaimNames,
claims: []*resourceapi.ResourceClaim{pendingClaim, pendingClaim2},
classes: []*resourceapi.ResourceClass{resourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
prebind: result{
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
@@ -879,7 +737,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
schedulings: []*resourceapi.PodSchedulingContext{schedulingInfo},
classes: []*resourceapi.ResourceClass{resourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
prebind: result{
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
@@ -899,7 +757,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
schedulings: []*resourceapi.PodSchedulingContext{schedulingInfo},
classes: []*resourceapi.ResourceClass{resourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
prepare: prepare{
prebind: change{
scheduling: func(in *resourceapi.PodSchedulingContext) *resourceapi.PodSchedulingContext {
@@ -923,7 +781,7 @@ func TestPlugin(t *testing.T) {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{allocatedClaim},
schedulings: []*resourceapi.PodSchedulingContext{schedulingInfo},
classes: []*resourceapi.ResourceClass{resourceClass},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
prebind: result{
changes: change{
@@ -967,7 +825,7 @@ func TestPlugin(t *testing.T) {
// PostFilter tries to get the pod scheduleable by
// deallocating the claim.
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaimWithWrongTopology},
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaimWithWrongTopology)},
want: want{
filter: perNodeResult{
workerNode.Name: {
@@ -979,7 +837,7 @@ func TestPlugin(t *testing.T) {
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
Allocation("", nil).
Allocation(nil).
Obj()
},
},
@@ -1028,7 +886,7 @@ func TestPlugin(t *testing.T) {
},
"bind-failure-structured": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaimWithGoodTopology},
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaimWithGoodTopology)},
want: want{
prebind: result{
changes: change{
@@ -1109,15 +967,20 @@ func TestPlugin(t *testing.T) {
t.Run(fmt.Sprintf("filter/%s", nodeInfo.Node().Name), func(t *testing.T) {
testCtx.verify(t, tc.want.filter.forNode(nodeName), initialObjects, nil, status)
})
if status.Code() != framework.Success {
unschedulable = true
} else {
if status.Code() == framework.Success {
potentialNodes = append(potentialNodes, nodeInfo)
}
if status.Code() == framework.Error {
// An error aborts scheduling.
return
}
}
if len(potentialNodes) == 0 {
unschedulable = true
}
}
if !unschedulable && len(potentialNodes) > 0 {
if !unschedulable && len(potentialNodes) > 1 {
initialObjects = testCtx.listAll(t)
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.prescore)
status := testCtx.p.PreScore(testCtx.ctx, testCtx.state, tc.pod, potentialNodes)
@@ -1184,7 +1047,7 @@ func TestPlugin(t *testing.T) {
})
}
}
} else {
} else if len(potentialNodes) == 0 {
initialObjects = testCtx.listAll(t)
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.postfilter)
result, status := testCtx.p.PostFilter(testCtx.ctx, testCtx.state, tc.pod, nil /* filteredNodeStatusMap not used by plugin */)
@@ -1209,7 +1072,12 @@ type testContext struct {
func (tc *testContext) verify(t *testing.T, expected result, initialObjects []metav1.Object, result interface{}, status *framework.Status) {
t.Helper()
assert.Equal(t, expected.status, status)
if expectedErr := status.AsError(); expectedErr != nil {
// Compare only the error strings.
assert.ErrorContains(t, status.AsError(), expectedErr.Error())
} else {
assert.Equal(t, expected.status, status)
}
objects := tc.listAll(t)
wantObjects := update(t, initialObjects, expected.changes)
wantObjects = append(wantObjects, expected.added...)
@@ -1351,7 +1219,7 @@ func update(t *testing.T, objects []metav1.Object, updates change) []metav1.Obje
return updated
}
func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.ResourceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object) (result *testContext) {
func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object) (result *testContext) {
t.Helper()
tc := &testContext{}
@@ -1387,7 +1255,7 @@ func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim,
require.NoError(t, err, "create resource claim")
}
for _, class := range classes {
_, err := tc.client.ResourceV1alpha3().ResourceClasses().Create(tc.ctx, class, metav1.CreateOptions{})
_, err := tc.client.ResourceV1alpha3().DeviceClasses().Create(tc.ctx, class, metav1.CreateOptions{})
require.NoError(t, err, "create resource class")
}
for _, scheduling := range schedulings {
@@ -1552,10 +1420,10 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
},
"structured-claim-deallocate": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim, otherStructuredAllocatedClaim},
oldObj: otherStructuredAllocatedClaim,
claims: []*resourceapi.ResourceClaim{pendingClaim, structuredClaim(otherAllocatedClaim)},
oldObj: structuredClaim(otherAllocatedClaim),
newObj: func() *resourceapi.ResourceClaim {
claim := otherStructuredAllocatedClaim.DeepCopy()
claim := structuredClaim(otherAllocatedClaim).DeepCopy()
claim.Status.Allocation = nil
return claim
}(),

View File

@@ -1,153 +0,0 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package namedresources
import (
"context"
"errors"
"fmt"
"slices"
resourceapi "k8s.io/api/resource/v1alpha3"
"k8s.io/apiserver/pkg/cel/environment"
"k8s.io/dynamic-resource-allocation/structured/namedresources/cel"
)
// These types and fields are all exported to allow logging them with
// pretty-printed JSON.
type Model struct {
Instances []InstanceAllocation
}
type InstanceAllocation struct {
Allocated bool
Instance *resourceapi.NamedResourcesInstance
}
// AddResources must be called first to create entries for all existing
// resource instances. The resources parameter may be nil.
func AddResources(m *Model, resources *resourceapi.NamedResourcesResources) {
if resources == nil {
return
}
for i := range resources.Instances {
m.Instances = append(m.Instances, InstanceAllocation{Instance: &resources.Instances[i]})
}
}
// AddAllocation may get called after AddResources to mark some resource
// instances as allocated. The result parameter may be nil.
func AddAllocation(m *Model, result *resourceapi.NamedResourcesAllocationResult) {
if result == nil {
return
}
for i := range m.Instances {
if m.Instances[i].Instance.Name == result.Name {
m.Instances[i].Allocated = true
break
}
}
}
func NewClaimController(filter *resourceapi.NamedResourcesFilter, requests []*resourceapi.NamedResourcesRequest) (*Controller, error) {
c := &Controller{}
if filter != nil {
compilation := cel.GetCompiler().CompileCELExpression(filter.Selector, environment.StoredExpressions)
if compilation.Error != nil {
// Shouldn't happen because of validation.
return nil, fmt.Errorf("compile class filter CEL expression: %w", compilation.Error)
}
c.filter = &compilation
}
for _, request := range requests {
compilation := cel.GetCompiler().CompileCELExpression(request.Selector, environment.StoredExpressions)
if compilation.Error != nil {
// Shouldn't happen because of validation.
return nil, fmt.Errorf("compile request CEL expression: %w", compilation.Error)
}
c.requests = append(c.requests, compilation)
}
return c, nil
}
type Controller struct {
filter *cel.CompilationResult
requests []cel.CompilationResult
}
func (c *Controller) NodeIsSuitable(ctx context.Context, model Model) (bool, error) {
indices, err := c.allocate(ctx, model)
return len(indices) == len(c.requests), err
}
func (c *Controller) Allocate(ctx context.Context, model Model) ([]*resourceapi.NamedResourcesAllocationResult, error) {
indices, err := c.allocate(ctx, model)
if err != nil {
return nil, err
}
if len(indices) != len(c.requests) {
return nil, errors.New("insufficient resources")
}
results := make([]*resourceapi.NamedResourcesAllocationResult, len(c.requests))
for i := range c.requests {
results[i] = &resourceapi.NamedResourcesAllocationResult{Name: model.Instances[indices[i]].Instance.Name}
}
return results, nil
}
func (c *Controller) allocate(ctx context.Context, model Model) ([]int, error) {
// Shallow copy, we need to modify the allocated boolean.
instances := slices.Clone(model.Instances)
indices := make([]int, 0, len(c.requests))
for _, request := range c.requests {
for i, instance := range instances {
if instance.Allocated {
continue
}
if c.filter != nil {
okay, err := c.filter.Evaluate(ctx, instance.Instance.Attributes)
if err != nil {
return nil, fmt.Errorf("evaluate filter CEL expression: %w", err)
}
if !okay {
continue
}
}
okay, err := request.Evaluate(ctx, instance.Instance.Attributes)
if err != nil {
return nil, fmt.Errorf("evaluate request CEL expression: %w", err)
}
if !okay {
continue
}
// Found a matching, unallocated instance. Let's use it.
//
// A more thorough search would include backtracking because
// allocating one "large" instances for a "small" request may
// make a following "large" request impossible to satisfy when
// only "small" instances are left.
instances[i].Allocated = true
indices = append(indices, i)
break
}
}
return indices, nil
}

View File

@@ -1,327 +0,0 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package namedresources
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
resourceapi "k8s.io/api/resource/v1alpha3"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
)
func instance(allocated bool, name string, attributes ...resourceapi.NamedResourcesAttribute) InstanceAllocation {
return InstanceAllocation{
Allocated: allocated,
Instance: &resourceapi.NamedResourcesInstance{
Name: name,
Attributes: attributes,
},
}
}
func TestModel(t *testing.T) {
testcases := map[string]struct {
resources []*resourceapi.NamedResourcesResources
allocations []*resourceapi.NamedResourcesAllocationResult
expectModel Model
}{
"empty": {},
"nil": {
resources: []*resourceapi.NamedResourcesResources{nil},
allocations: []*resourceapi.NamedResourcesAllocationResult{nil},
},
"available": {
resources: []*resourceapi.NamedResourcesResources{
{
Instances: []resourceapi.NamedResourcesInstance{
{Name: "a"},
{Name: "b"},
},
},
{
Instances: []resourceapi.NamedResourcesInstance{
{Name: "x"},
{Name: "y"},
},
},
},
expectModel: Model{Instances: []InstanceAllocation{instance(false, "a"), instance(false, "b"), instance(false, "x"), instance(false, "y")}},
},
"allocated": {
resources: []*resourceapi.NamedResourcesResources{
{
Instances: []resourceapi.NamedResourcesInstance{
{Name: "a"},
{Name: "b"},
},
},
{
Instances: []resourceapi.NamedResourcesInstance{
{Name: "x"},
{Name: "y"},
},
},
},
allocations: []*resourceapi.NamedResourcesAllocationResult{
{
Name: "something-else",
},
{
Name: "a",
},
},
expectModel: Model{Instances: []InstanceAllocation{instance(true, "a"), instance(false, "b"), instance(false, "x"), instance(false, "y")}},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
var actualModel Model
for _, resources := range tc.resources {
AddResources(&actualModel, resources)
}
for _, allocation := range tc.allocations {
AddAllocation(&actualModel, allocation)
}
require.Equal(t, tc.expectModel, actualModel)
})
}
}
func TestController(t *testing.T) {
filterAny := &resourceapi.NamedResourcesFilter{
Selector: "true",
}
filterNone := &resourceapi.NamedResourcesFilter{
Selector: "false",
}
filterBrokenType := &resourceapi.NamedResourcesFilter{
Selector: "1",
}
filterBrokenEvaluation := &resourceapi.NamedResourcesFilter{
Selector: `attributes.bool["no-such-attribute"]`,
}
filterAttribute := &resourceapi.NamedResourcesFilter{
Selector: `attributes.bool["usable"]`,
}
requestAny := &resourceapi.NamedResourcesRequest{
Selector: "true",
}
requestNone := &resourceapi.NamedResourcesRequest{
Selector: "false",
}
requestBrokenType := &resourceapi.NamedResourcesRequest{
Selector: "1",
}
requestBrokenEvaluation := &resourceapi.NamedResourcesRequest{
Selector: `attributes.bool["no-such-attribute"]`,
}
requestAttribute := &resourceapi.NamedResourcesRequest{
Selector: `attributes.bool["usable"]`,
}
instance1 := "instance-1"
oneInstance := Model{
Instances: []InstanceAllocation{{
Instance: &resourceapi.NamedResourcesInstance{
Name: instance1,
},
}},
}
instance2 := "instance-2"
twoInstances := Model{
Instances: []InstanceAllocation{
{
Instance: &resourceapi.NamedResourcesInstance{
Name: instance1,
Attributes: []resourceapi.NamedResourcesAttribute{{
Name: "usable",
NamedResourcesAttributeValue: resourceapi.NamedResourcesAttributeValue{
BoolValue: ptr.To(false),
},
}},
},
},
{
Instance: &resourceapi.NamedResourcesInstance{
Name: instance2,
Attributes: []resourceapi.NamedResourcesAttribute{{
Name: "usable",
NamedResourcesAttributeValue: resourceapi.NamedResourcesAttributeValue{
BoolValue: ptr.To(true),
},
}},
},
},
},
}
testcases := map[string]struct {
model Model
filter *resourceapi.NamedResourcesFilter
requests []*resourceapi.NamedResourcesRequest
expectCreateErr bool
expectAllocation []string
expectAllocateErr bool
}{
"empty": {},
"broken-filter": {
filter: filterBrokenType,
expectCreateErr: true,
},
"broken-request": {
requests: []*resourceapi.NamedResourcesRequest{requestBrokenType},
expectCreateErr: true,
},
"no-resources": {
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestAny},
expectAllocateErr: true,
},
"okay": {
model: oneInstance,
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestAny},
expectAllocation: []string{instance1},
},
"filter-mismatch": {
model: oneInstance,
filter: filterNone,
requests: []*resourceapi.NamedResourcesRequest{requestAny},
expectAllocateErr: true,
},
"request-mismatch": {
model: oneInstance,
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestNone},
expectAllocateErr: true,
},
"many": {
model: twoInstances,
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestAny, requestAny},
expectAllocation: []string{instance1, instance2},
},
"too-many": {
model: oneInstance,
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestAny, requestAny},
expectAllocateErr: true,
},
"filter-evaluation-error": {
model: oneInstance,
filter: filterBrokenEvaluation,
requests: []*resourceapi.NamedResourcesRequest{requestAny},
expectAllocateErr: true,
},
"request-evaluation-error": {
model: oneInstance,
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestBrokenEvaluation},
expectAllocateErr: true,
},
"filter-attribute": {
model: twoInstances,
filter: filterAttribute,
requests: []*resourceapi.NamedResourcesRequest{requestAny},
expectAllocation: []string{instance2},
},
"request-attribute": {
model: twoInstances,
filter: filterAny,
requests: []*resourceapi.NamedResourcesRequest{requestAttribute},
expectAllocation: []string{instance2},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
tCtx := ktesting.Init(t)
controller, createErr := NewClaimController(tc.filter, tc.requests)
if createErr != nil {
if !tc.expectCreateErr {
tCtx.Fatalf("unexpected create error: %v", createErr)
}
return
}
if tc.expectCreateErr {
tCtx.Fatalf("did not get expected create error")
}
allocation, createErr := controller.Allocate(tCtx, tc.model)
if createErr != nil {
if !tc.expectAllocateErr {
tCtx.Fatalf("unexpected allocate error: %v", createErr)
}
return
}
if tc.expectAllocateErr {
tCtx.Fatalf("did not get expected allocate error")
}
expectAllocation := []*resourceapi.NamedResourcesAllocationResult{}
for _, name := range tc.expectAllocation {
expectAllocation = append(expectAllocation, &resourceapi.NamedResourcesAllocationResult{Name: name})
}
require.Equal(tCtx, expectAllocation, allocation)
isSuitable, isSuitableErr := controller.NodeIsSuitable(tCtx, tc.model)
assert.Equal(tCtx, len(expectAllocation) == len(tc.requests), isSuitable, "is suitable")
assert.Equal(tCtx, createErr, isSuitableErr)
})
}
}

View File

@@ -1,274 +0,0 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dynamicresources
import (
"context"
"fmt"
"sync"
v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha3"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
namedresourcesmodel "k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/structured/namedresources"
)
// resources is a map "node name" -> "driver name" -> available and
// allocated resources per structured parameter model.
type resources map[string]map[string]ResourceModels
// ResourceModels may have more than one entry because it is valid for a driver to
// use more than one structured parameter model.
type ResourceModels struct {
NamedResources namedresourcesmodel.Model
}
// resourceSliceLister is the subset of resourcelisters.ResourceSliceLister needed by
// newResourceModel.
type resourceSliceLister interface {
List(selector labels.Selector) (ret []*resourceapi.ResourceSlice, err error)
}
// assumeCacheLister is the subset of volumebinding.AssumeCache needed by newResourceModel.
type assumeCacheLister interface {
List(indexObj interface{}) []interface{}
}
// newResourceModel parses the available information about resources. Objects
// with an unknown structured parameter model silently ignored. An error gets
// logged later when parameters required for a pod depend on such an unknown
// model.
func newResourceModel(logger klog.Logger, resourceSliceLister resourceSliceLister, claimAssumeCache assumeCacheLister, inFlightAllocations *sync.Map) (resourceMap, error) {
model := make(resourceMap)
slices, err := resourceSliceLister.List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("list node resource slices: %w", err)
}
for _, slice := range slices {
if slice.NamedResources == nil {
// Ignore unknown resource. We don't know what it is,
// so we cannot allocated anything depending on
// it. This is only an error if we actually see a claim
// which needs this unknown model.
continue
}
instances := slice.NamedResources.Instances
if model[slice.NodeName] == nil {
model[slice.NodeName] = make(map[string]Resources)
}
resources := model[slice.NodeName][slice.DriverName]
resources.Instances = make([]Instance, 0, len(instances))
for i := range instances {
instance := Instance{
NodeName: slice.NodeName,
DriverName: slice.DriverName,
NamedResourcesInstance: &instances[i],
}
resources.Instances = append(resources.Instances, instance)
}
model[slice.NodeName][slice.DriverName] = resources
}
objs := claimAssumeCache.List(nil)
for _, obj := range objs {
claim, ok := obj.(*resourceapi.ResourceClaim)
if !ok {
return nil, fmt.Errorf("got unexpected object of type %T from claim assume cache", obj)
}
if obj, ok := inFlightAllocations.Load(claim.UID); ok {
// If the allocation is in-flight, then we have to use the allocation
// from that claim.
claim = obj.(*resourceapi.ResourceClaim)
}
if claim.Status.Allocation == nil {
continue
}
for _, handle := range claim.Status.Allocation.ResourceHandles {
structured := handle.StructuredData
if structured == nil {
continue
}
if model[structured.NodeName] == nil {
model[structured.NodeName] = make(map[string]Resources)
}
resources := model[structured.NodeName][handle.DriverName]
for _, result := range structured.Results {
// Same as above: if we don't know the allocation result model, ignore it.
if result.NamedResources == nil {
continue
}
instanceName := result.NamedResources.Name
for i := range resources.Instances {
if resources.Instances[i].NamedResourcesInstance.Name == instanceName {
resources.Instances[i].Allocated = true
break
}
}
// It could be that we don't know the instance. That's okay,
// we simply ignore the allocation result.
}
}
}
return model, nil
}
func newClaimController(logger klog.Logger, class *resourceapi.ResourceClass, classParameters *resourceapi.ResourceClassParameters, claimParameters *resourceapi.ResourceClaimParameters) (*claimController, error) {
// Each node driver is separate from the others. Each driver may have
// multiple requests which need to be allocated together, so here
// we have to collect them per model.
type perDriverRequests struct {
parameters []runtime.RawExtension
requests []*resourceapi.NamedResourcesRequest
}
namedresourcesRequests := make(map[string]perDriverRequests)
for i, request := range claimParameters.DriverRequests {
driverName := request.DriverName
p := namedresourcesRequests[driverName]
for e, request := range request.Requests {
switch {
case request.ResourceRequestModel.NamedResources != nil:
p.parameters = append(p.parameters, request.VendorParameters)
p.requests = append(p.requests, request.ResourceRequestModel.NamedResources)
default:
return nil, fmt.Errorf("claim parameters %s: driverRequests[%d].requests[%d]: no supported structured parameters found", klog.KObj(claimParameters), i, e)
}
}
if len(p.requests) > 0 {
namedresourcesRequests[driverName] = p
}
}
c := &claimController{
class: class,
classParameters: classParameters,
claimParameters: claimParameters,
namedresources: make(map[string]perDriverController, len(namedresourcesRequests)),
}
for driverName, perDriver := range namedresourcesRequests {
var filter *resourceapi.NamedResourcesFilter
for _, f := range classParameters.Filters {
if f.DriverName == driverName && f.ResourceFilterModel.NamedResources != nil {
filter = f.ResourceFilterModel.NamedResources
break
}
}
controller, err := namedresourcesmodel.NewClaimController(filter, perDriver.requests)
if err != nil {
return nil, fmt.Errorf("creating claim controller for named resources structured model: %w", err)
}
c.namedresources[driverName] = perDriverController{
parameters: perDriver.parameters,
controller: controller,
}
}
return c, nil
}
// claimController currently wraps exactly one structured parameter model.
type claimController struct {
class *resourceapi.ResourceClass
classParameters *resourceapi.ResourceClassParameters
claimParameters *resourceapi.ResourceClaimParameters
namedresources map[string]perDriverController
}
type perDriverController struct {
parameters []runtime.RawExtension
controller *namedresourcesmodel.Controller
}
func (c claimController) nodeIsSuitable(ctx context.Context, nodeName string, resources resources) (bool, error) {
nodeResources := resources[nodeName]
for driverName, perDriver := range c.namedresources {
okay, err := perDriver.controller.NodeIsSuitable(ctx, nodeResources[driverName].NamedResources)
if err != nil {
// This is an error in the CEL expression which needs
// to be fixed. Better fail very visibly instead of
// ignoring the node.
return false, fmt.Errorf("checking node %q and resources of driver %q: %w", nodeName, driverName, err)
}
if !okay {
return false, nil
}
}
return true, nil
}
func (c claimController) allocate(ctx context.Context, nodeName string, resources resources) (string, *resourceapi.AllocationResult, error) {
allocation := &resourceapi.AllocationResult{
AvailableOnNodes: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{Key: "kubernetes.io/hostname", Operator: v1.NodeSelectorOpIn, Values: []string{nodeName}},
},
},
},
},
}
nodeResources := resources[nodeName]
for driverName, perDriver := range c.namedresources {
// Must return one entry for each request. The entry may be nil. This way,
// the result can be correlated with the per-request parameters.
results, err := perDriver.controller.Allocate(ctx, nodeResources[driverName].NamedResources)
if err != nil {
return "", nil, fmt.Errorf("allocating via named resources structured model: %w", err)
}
handle := resourceapi.ResourceHandle{
DriverName: driverName,
StructuredData: &resourceapi.StructuredResourceHandle{
NodeName: nodeName,
},
}
for i, result := range results {
if result == nil {
continue
}
handle.StructuredData.Results = append(handle.StructuredData.Results,
resourceapi.DriverAllocationResult{
VendorRequestParameters: perDriver.parameters[i],
AllocationResultModel: resourceapi.AllocationResultModel{
NamedResources: result,
},
},
)
}
if c.classParameters != nil {
for _, p := range c.classParameters.VendorParameters {
if p.DriverName == driverName {
handle.StructuredData.VendorClassParameters = p.Parameters
break
}
}
}
for _, request := range c.claimParameters.DriverRequests {
if request.DriverName == driverName {
handle.StructuredData.VendorClaimParameters = request.VendorParameters
break
}
}
allocation.ResourceHandles = append(allocation.ResourceHandles, handle)
}
return c.class.DriverName, allocation, nil
}

View File

@@ -93,18 +93,16 @@ const (
// unschedulable pod pool.
// This behavior will be removed when we remove the preCheck feature.
// See: https://github.com/kubernetes/kubernetes/issues/110175
Node GVK = "Node"
PersistentVolume GVK = "PersistentVolume"
PersistentVolumeClaim GVK = "PersistentVolumeClaim"
CSINode GVK = "storage.k8s.io/CSINode"
CSIDriver GVK = "storage.k8s.io/CSIDriver"
CSIStorageCapacity GVK = "storage.k8s.io/CSIStorageCapacity"
StorageClass GVK = "storage.k8s.io/StorageClass"
PodSchedulingContext GVK = "PodSchedulingContext"
ResourceClaim GVK = "ResourceClaim"
ResourceClass GVK = "ResourceClass"
ResourceClaimParameters GVK = "ResourceClaimParameters"
ResourceClassParameters GVK = "ResourceClassParameters"
Node GVK = "Node"
PersistentVolume GVK = "PersistentVolume"
PersistentVolumeClaim GVK = "PersistentVolumeClaim"
CSINode GVK = "storage.k8s.io/CSINode"
CSIDriver GVK = "storage.k8s.io/CSIDriver"
CSIStorageCapacity GVK = "storage.k8s.io/CSIStorageCapacity"
StorageClass GVK = "storage.k8s.io/StorageClass"
PodSchedulingContext GVK = "PodSchedulingContext"
ResourceClaim GVK = "ResourceClaim"
DeviceClass GVK = "DeviceClass"
// WildCard is a special GVK to match all resources.
// e.g., If you register `{Resource: "*", ActionType: All}` in EventsToRegister,
@@ -197,9 +195,7 @@ func UnrollWildCardResource() []ClusterEventWithHint {
{Event: ClusterEvent{Resource: StorageClass, ActionType: All}},
{Event: ClusterEvent{Resource: PodSchedulingContext, ActionType: All}},
{Event: ClusterEvent{Resource: ResourceClaim, ActionType: All}},
{Event: ClusterEvent{Resource: ResourceClass, ActionType: All}},
{Event: ClusterEvent{Resource: ResourceClaimParameters, ActionType: All}},
{Event: ClusterEvent{Resource: ResourceClassParameters, ActionType: All}},
{Event: ClusterEvent{Resource: DeviceClass, ActionType: All}},
}
}

View File

@@ -646,13 +646,7 @@ func Test_buildQueueingHintMap(t *testing.T) {
{Resource: framework.ResourceClaim, ActionType: framework.All}: {
{PluginName: filterWithoutEnqueueExtensions, QueueingHintFn: defaultQueueingHintFn},
},
{Resource: framework.ResourceClass, ActionType: framework.All}: {
{PluginName: filterWithoutEnqueueExtensions, QueueingHintFn: defaultQueueingHintFn},
},
{Resource: framework.ResourceClaimParameters, ActionType: framework.All}: {
{PluginName: filterWithoutEnqueueExtensions, QueueingHintFn: defaultQueueingHintFn},
},
{Resource: framework.ResourceClassParameters, ActionType: framework.All}: {
{Resource: framework.DeviceClass, ActionType: framework.All}: {
{PluginName: filterWithoutEnqueueExtensions, QueueingHintFn: defaultQueueingHintFn},
},
},
@@ -803,19 +797,17 @@ func Test_UnionedGVKs(t *testing.T) {
Disabled: []schedulerapi.Plugin{{Name: "*"}}, // disable default plugins
},
want: map[framework.GVK]framework.ActionType{
framework.Pod: framework.All,
framework.Node: framework.All,
framework.CSINode: framework.All,
framework.CSIDriver: framework.All,
framework.CSIStorageCapacity: framework.All,
framework.PersistentVolume: framework.All,
framework.PersistentVolumeClaim: framework.All,
framework.StorageClass: framework.All,
framework.PodSchedulingContext: framework.All,
framework.ResourceClaim: framework.All,
framework.ResourceClass: framework.All,
framework.ResourceClaimParameters: framework.All,
framework.ResourceClassParameters: framework.All,
framework.Pod: framework.All,
framework.Node: framework.All,
framework.CSINode: framework.All,
framework.CSIDriver: framework.All,
framework.CSIStorageCapacity: framework.All,
framework.PersistentVolume: framework.All,
framework.PersistentVolumeClaim: framework.All,
framework.StorageClass: framework.All,
framework.PodSchedulingContext: framework.All,
framework.ResourceClaim: framework.All,
framework.DeviceClass: framework.All,
},
},
{

View File

@@ -900,8 +900,8 @@ func (p *PersistentVolumeWrapper) NodeAffinityIn(key string, vals []string) *Per
type ResourceClaimWrapper struct{ resourceapi.ResourceClaim }
// MakeResourceClaim creates a ResourceClaim wrapper.
func MakeResourceClaim() *ResourceClaimWrapper {
return &ResourceClaimWrapper{resourceapi.ResourceClaim{}}
func MakeResourceClaim(controller string) *ResourceClaimWrapper {
return &ResourceClaimWrapper{resourceapi.ResourceClaim{Spec: resourceapi.ResourceClaimSpec{Controller: controller}}}
}
// FromResourceClaim creates a ResourceClaim wrapper from some existing object.
@@ -946,72 +946,34 @@ func (wrapper *ResourceClaimWrapper) OwnerReference(name, uid string, gvk schema
return wrapper
}
// ParametersRef sets a reference to a ResourceClaimParameters.resource.k8s.io.
func (wrapper *ResourceClaimWrapper) ParametersRef(name string) *ResourceClaimWrapper {
wrapper.ResourceClaim.Spec.ParametersRef = &resourceapi.ResourceClaimParametersReference{
Name: name,
Kind: "ResourceClaimParameters",
APIGroup: "resource.k8s.io",
}
return wrapper
}
// ResourceClassName sets the resource class name of the inner object.
func (wrapper *ResourceClaimWrapper) ResourceClassName(name string) *ResourceClaimWrapper {
wrapper.ResourceClaim.Spec.ResourceClassName = name
// Request adds one device request for the given device class.
func (wrapper *ResourceClaimWrapper) Request(deviceClassName string) *ResourceClaimWrapper {
wrapper.Spec.Devices.Requests = append(wrapper.Spec.Devices.Requests,
resourceapi.DeviceRequest{
Name: fmt.Sprintf("req-%d", len(wrapper.Spec.Devices.Requests)+1),
// Cannot rely on defaulting here, this is used in unit tests.
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: 1,
DeviceClassName: deviceClassName,
},
)
return wrapper
}
// Allocation sets the allocation of the inner object.
func (wrapper *ResourceClaimWrapper) Allocation(driverName string, allocation *resourceapi.AllocationResult) *ResourceClaimWrapper {
wrapper.ResourceClaim.Status.DriverName = driverName
func (wrapper *ResourceClaimWrapper) Allocation(allocation *resourceapi.AllocationResult) *ResourceClaimWrapper {
wrapper.ResourceClaim.Status.Allocation = allocation
return wrapper
}
// Structured turns a "normal" claim into one which was allocated via structured parameters.
// This modifies the allocation result and adds the reserved finalizer if the claim
// is allocated. The claim has to become local to a node. The assumption is that
// "named resources" are used.
func (wrapper *ResourceClaimWrapper) Structured(nodeName string, namedResourcesInstances ...string) *ResourceClaimWrapper {
// The only difference is that there is no controller name and the special finalizer
// gets added.
func (wrapper *ResourceClaimWrapper) Structured() *ResourceClaimWrapper {
wrapper.Spec.Controller = ""
if wrapper.ResourceClaim.Status.Allocation != nil {
wrapper.ResourceClaim.Finalizers = append(wrapper.ResourceClaim.Finalizers, resourceapi.Finalizer)
for i, resourceHandle := range wrapper.ResourceClaim.Status.Allocation.ResourceHandles {
resourceHandle.Data = ""
resourceHandle.StructuredData = &resourceapi.StructuredResourceHandle{
NodeName: nodeName,
}
wrapper.ResourceClaim.Status.Allocation.ResourceHandles[i] = resourceHandle
}
if len(wrapper.ResourceClaim.Status.Allocation.ResourceHandles) == 0 {
wrapper.ResourceClaim.Status.Allocation.ResourceHandles = []resourceapi.ResourceHandle{{
DriverName: wrapper.ResourceClaim.Status.DriverName,
StructuredData: &resourceapi.StructuredResourceHandle{
NodeName: nodeName,
},
}}
}
for _, resourceHandle := range wrapper.ResourceClaim.Status.Allocation.ResourceHandles {
for _, name := range namedResourcesInstances {
result := resourceapi.DriverAllocationResult{
AllocationResultModel: resourceapi.AllocationResultModel{
NamedResources: &resourceapi.NamedResourcesAllocationResult{
Name: name,
},
},
}
resourceHandle.StructuredData.Results = append(resourceHandle.StructuredData.Results, result)
}
}
wrapper.ResourceClaim.Status.Allocation.AvailableOnNodes = &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{{
MatchExpressions: []v1.NodeSelectorRequirement{{
Key: "kubernetes.io/hostname",
Operator: v1.NodeSelectorOpIn,
Values: []string{nodeName},
}},
}},
}
wrapper.ResourceClaim.Status.Allocation.Controller = ""
}
return wrapper
}
@@ -1120,8 +1082,9 @@ type ResourceSliceWrapper struct {
func MakeResourceSlice(nodeName, driverName string) *ResourceSliceWrapper {
wrapper := new(ResourceSliceWrapper)
wrapper.Name = nodeName + "-" + driverName
wrapper.NodeName = nodeName
wrapper.DriverName = driverName
wrapper.Spec.NodeName = nodeName
wrapper.Spec.Pool.Name = nodeName
wrapper.Spec.Driver = driverName
return wrapper
}
@@ -1129,119 +1092,14 @@ func (wrapper *ResourceSliceWrapper) Obj() *resourceapi.ResourceSlice {
return &wrapper.ResourceSlice
}
func (wrapper *ResourceSliceWrapper) NamedResourcesInstances(names ...string) *ResourceSliceWrapper {
wrapper.ResourceModel = resourceapi.ResourceModel{NamedResources: &resourceapi.NamedResourcesResources{}}
func (wrapper *ResourceSliceWrapper) Devices(names ...string) *ResourceSliceWrapper {
for _, name := range names {
wrapper.ResourceModel.NamedResources.Instances = append(wrapper.ResourceModel.NamedResources.Instances,
resourceapi.NamedResourcesInstance{Name: name},
)
wrapper.Spec.Devices = append(wrapper.Spec.Devices, resourceapi.Device{Name: name})
}
return wrapper
}
type ClaimParametersWrapper struct {
resourceapi.ResourceClaimParameters
}
func MakeClaimParameters() *ClaimParametersWrapper {
return &ClaimParametersWrapper{}
}
// FromClaimParameters creates a ResourceClaimParameters wrapper from an existing object.
func FromClaimParameters(other *resourceapi.ResourceClaimParameters) *ClaimParametersWrapper {
return &ClaimParametersWrapper{*other.DeepCopy()}
}
func (wrapper *ClaimParametersWrapper) Obj() *resourceapi.ResourceClaimParameters {
return &wrapper.ResourceClaimParameters
}
func (wrapper *ClaimParametersWrapper) Name(s string) *ClaimParametersWrapper {
wrapper.SetName(s)
return wrapper
}
func (wrapper *ClaimParametersWrapper) UID(s string) *ClaimParametersWrapper {
wrapper.SetUID(types.UID(s))
return wrapper
}
func (wrapper *ClaimParametersWrapper) Namespace(s string) *ClaimParametersWrapper {
wrapper.SetNamespace(s)
return wrapper
}
func (wrapper *ClaimParametersWrapper) GeneratedFrom(value *resourceapi.ResourceClaimParametersReference) *ClaimParametersWrapper {
wrapper.ResourceClaimParameters.GeneratedFrom = value
return wrapper
}
func (wrapper *ClaimParametersWrapper) NamedResourcesRequests(driverName string, selectors ...string) *ClaimParametersWrapper {
requests := resourceapi.DriverRequests{
DriverName: driverName,
}
for _, selector := range selectors {
request := resourceapi.ResourceRequest{
ResourceRequestModel: resourceapi.ResourceRequestModel{
NamedResources: &resourceapi.NamedResourcesRequest{
Selector: selector,
},
},
}
requests.Requests = append(requests.Requests, request)
}
wrapper.DriverRequests = append(wrapper.DriverRequests, requests)
return wrapper
}
type ClassParametersWrapper struct {
resourceapi.ResourceClassParameters
}
func MakeClassParameters() *ClassParametersWrapper {
return &ClassParametersWrapper{}
}
// FromClassParameters creates a ResourceClassParameters wrapper from an existing object.
func FromClassParameters(other *resourceapi.ResourceClassParameters) *ClassParametersWrapper {
return &ClassParametersWrapper{*other.DeepCopy()}
}
func (wrapper *ClassParametersWrapper) Obj() *resourceapi.ResourceClassParameters {
return &wrapper.ResourceClassParameters
}
func (wrapper *ClassParametersWrapper) Name(s string) *ClassParametersWrapper {
wrapper.SetName(s)
return wrapper
}
func (wrapper *ClassParametersWrapper) UID(s string) *ClassParametersWrapper {
wrapper.SetUID(types.UID(s))
return wrapper
}
func (wrapper *ClassParametersWrapper) Namespace(s string) *ClassParametersWrapper {
wrapper.SetNamespace(s)
return wrapper
}
func (wrapper *ClassParametersWrapper) GeneratedFrom(value *resourceapi.ResourceClassParametersReference) *ClassParametersWrapper {
wrapper.ResourceClassParameters.GeneratedFrom = value
return wrapper
}
func (wrapper *ClassParametersWrapper) NamedResourcesFilters(driverName string, selectors ...string) *ClassParametersWrapper {
for _, selector := range selectors {
filter := resourceapi.ResourceFilter{
DriverName: driverName,
ResourceFilterModel: resourceapi.ResourceFilterModel{
NamedResources: &resourceapi.NamedResourcesFilter{
Selector: selector,
},
},
}
wrapper.Filters = append(wrapper.Filters, filter)
}
func (wrapper *ResourceSliceWrapper) Device(name string, attrs map[resourceapi.QualifiedName]resourceapi.DeviceAttribute) *ResourceSliceWrapper {
wrapper.Spec.Devices = append(wrapper.Spec.Devices, resourceapi.Device{Name: name, Basic: &resourceapi.BasicDevice{Attributes: attrs}})
return wrapper
}