From ec70b2ec8011c8891d1b3c838334c78930aec215 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 21 Jun 2023 10:42:22 +0200 Subject: [PATCH 1/3] e2e dra: add "kubelet must skip NodePrepareResource if not used by any container" If (for whatever reason) no container uses a claim, then there's no need to prepare it. --- test/e2e/dra/dra.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 59dc3376da3..3d77fd93bd0 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -61,11 +61,11 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu nodes := NewNodes(f, 1, 1) driver := NewDriver(f, nodes, networkResources) // All tests get their own driver instance. b := newBuilder(f, driver) + ginkgo.It("registers plugin", func() { ginkgo.By("the driver is running") }) - // This test does not pass at the moment because kubelet doesn't retry. ginkgo.It("must retry NodePrepareResource", func(ctx context.Context) { // We have exactly one host. m := MethodInstance{driver.Nodenames()[0], NodePrepareResourceMethod} @@ -95,6 +95,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu framework.Fail("NodePrepareResource should have been called again") } }) + ginkgo.It("must not run a pod if a claim is not reserved for it", func(ctx context.Context) { parameters := b.parameters() claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate) @@ -118,6 +119,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu return nil }, 20*time.Second, 200*time.Millisecond).Should(gomega.BeNil()) }) + ginkgo.It("must unprepare resources for force-deleted pod", func(ctx context.Context) { parameters := b.parameters() claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate) @@ -140,6 +142,19 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu gomega.Eventually(plugin.GetPreparedResources).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "prepared claims on host %s", host) } }) + + ginkgo.It("must skip NodePrepareResource if not used by any container", func(ctx context.Context) { + parameters := b.parameters() + pod, template := b.podInline(resourcev1alpha2.AllocationModeWaitForFirstConsumer) + for i := range pod.Spec.Containers { + pod.Spec.Containers[i].Resources.Claims = nil + } + b.create(ctx, parameters, pod, template) + framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod), "start pod") + for host, plugin := range b.driver.Nodes { + gomega.Expect(plugin.GetPreparedResources()).Should(gomega.BeEmpty(), "not claims should be prepared on host %s while pod is running", host) + } + }) }) ginkgo.Context("driver", func() { From 874daa8b52c737c2e6489bc07b15c1ef6c4e774a Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 27 Jun 2023 15:56:42 +0200 Subject: [PATCH 2/3] kubelet dra: fix checking of second pod which uses a claim When a second pod wanted to use a claim, the obligatory sanity check whether the pod is really allowed to use the claim ("reserved for") was skipped. --- pkg/kubelet/cm/dra/manager.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pkg/kubelet/cm/dra/manager.go b/pkg/kubelet/cm/dra/manager.go index 4eca6110311..7ef397bfcd4 100644 --- a/pkg/kubelet/cm/dra/manager.go +++ b/pkg/kubelet/cm/dra/manager.go @@ -70,20 +70,6 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { claimName := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i]) klog.V(3).InfoS("Processing resource", "claim", claimName, "pod", pod.Name) - // Resource is already prepared, add pod UID to it - if claimInfo := m.cache.get(claimName, pod.Namespace); claimInfo != nil { - // We delay checkpointing of this change until this call - // returns successfully. It is OK to do this because we - // will only return successfully from this call if the - // checkpoint has succeeded. That means if the kubelet is - // ever restarted before this checkpoint succeeds, the pod - // whose resources are being prepared would never have - // started, so it's OK (actually correct) to not include it - // in the cache. - claimInfo.addPodReference(pod.UID) - continue - } - // Query claim object from the API server resourceClaim, err := m.kubeClient.ResourceV1alpha2().ResourceClaims(pod.Namespace).Get( context.TODO(), @@ -99,6 +85,20 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { pod.Name, pod.UID, claimName, resourceClaim.UID) } + // Is the resource already prepared? Then add the pod UID to it. + if claimInfo := m.cache.get(claimName, pod.Namespace); claimInfo != nil { + // We delay checkpointing of this change until this call + // returns successfully. It is OK to do this because we + // will only return successfully from this call if the + // checkpoint has succeeded. That means if the kubelet is + // ever restarted before this checkpoint succeeds, the pod + // whose resources are being prepared would never have + // started, so it's OK (actually correct) to not include it + // in the cache. + claimInfo.addPodReference(pod.UID) + continue + } + // Grab the allocation.resourceHandles. If there are no // allocation.resourceHandles, create a single resourceHandle with no // content. This will trigger processing of this claim by a single From bde66bfb55cbf2134dde1f62b21059c6bcf5c049 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 21 Jun 2023 16:43:46 +0200 Subject: [PATCH 3/3] kubelet dra: restore skipping of unused resource claims 1aeec10efb removed iterating over containers in favor of iterating over pod claims. This had the unintended consequence that NodePrepareResource gets called unnecessarily when no container needs the claim. The more natural behavior is to skip unused resources. This enables (theoretic, at this time) use cases where some DRA driver relies on the controller part to influence scheduling, but then doesn't use CDI with containers. --- pkg/kubelet/cm/dra/manager.go | 38 ++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/dra/manager.go b/pkg/kubelet/cm/dra/manager.go index 7ef397bfcd4..2596646c473 100644 --- a/pkg/kubelet/cm/dra/manager.go +++ b/pkg/kubelet/cm/dra/manager.go @@ -67,7 +67,8 @@ func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string) ( // containerResources on success. func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { for i := range pod.Spec.ResourceClaims { - claimName := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i]) + podClaim := &pod.Spec.ResourceClaims[i] + claimName := resourceclaim.Name(pod, podClaim) klog.V(3).InfoS("Processing resource", "claim", claimName, "pod", pod.Name) // Query claim object from the API server @@ -85,6 +86,13 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { pod.Name, pod.UID, claimName, resourceClaim.UID) } + // If no container actually uses the claim, then we don't need + // to prepare it. + if !claimIsUsedByPod(podClaim, pod) { + klog.V(5).InfoS("Skipping unused resource", "claim", claimName, "pod", pod.Name) + continue + } + // Is the resource already prepared? Then add the pod UID to it. if claimInfo := m.cache.get(claimName, pod.Namespace); claimInfo != nil { // We delay checkpointing of this change until this call @@ -178,6 +186,34 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { return nil } +func claimIsUsedByPod(podClaim *v1.PodResourceClaim, pod *v1.Pod) bool { + if claimIsUsedByContainers(podClaim, pod.Spec.InitContainers) { + return true + } + if claimIsUsedByContainers(podClaim, pod.Spec.Containers) { + return true + } + return false +} + +func claimIsUsedByContainers(podClaim *v1.PodResourceClaim, containers []v1.Container) bool { + for i := range containers { + if claimIsUsedByContainer(podClaim, &containers[i]) { + return true + } + } + return false +} + +func claimIsUsedByContainer(podClaim *v1.PodResourceClaim, container *v1.Container) bool { + for _, c := range container.Resources.Claims { + if c.Name == podClaim.Name { + return true + } + } + return false +} + // GetResources gets a ContainerInfo object from the claimInfo cache. // This information is used by the caller to update a container config. func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error) {