Merge pull request #118936 from pohly/dra-deallocate-when-unused

DRA: for delayed allocation, deallocate when no longer used
This commit is contained in:
Kubernetes Prow Robot 2023-07-01 12:56:48 -07:00 committed by GitHub
commit ec87834bae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 2 deletions

View File

@ -456,6 +456,28 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err
// TODO (#113700): patch // TODO (#113700): patch
claim := claim.DeepCopy() claim := claim.DeepCopy()
claim.Status.ReservedFor = valid claim.Status.ReservedFor = valid
// When a ResourceClaim uses delayed allocation, then it makes sense to
// deallocate the claim as soon as the last consumer stops using
// it. This ensures that the claim can be allocated again as needed by
// some future consumer instead of trying to schedule that consumer
// onto the node that was chosen for the previous consumer. It also
// releases the underlying resources for use by other claims.
//
// This has to be triggered by the transition from "was being used" to
// "is not used anymore" because a DRA driver is not required to set
// `status.reservedFor` together with `status.allocation`, i.e. a claim
// that is "currently unused" should not get deallocated.
//
// This does not matter for claims that were created for a pod. For
// those, the resource claim controller will trigger deletion when the
// pod is done. However, it doesn't hurt to also trigger deallocation
// for such claims and not checking for them keeps this code simpler.
if len(valid) == 0 &&
claim.Spec.AllocationMode == resourcev1alpha2.AllocationModeWaitForFirstConsumer {
claim.Status.DeallocationRequested = true
}
_, err := ec.kubeClient.ResourceV1alpha2().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{}) _, err := ec.kubeClient.ResourceV1alpha2().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
if err != nil { if err != nil {
return err return err

View File

@ -214,11 +214,11 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
plugin, err := app.StartPlugin(logger, "/cdi", d.Name, nodename, plugin, err := app.StartPlugin(logger, "/cdi", d.Name, nodename,
app.FileOperations{ app.FileOperations{
Create: func(name string, content []byte) error { Create: func(name string, content []byte) error {
ginkgo.By(fmt.Sprintf("creating CDI file %s on node %s:\n%s", name, nodename, string(content))) klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content))
return d.createFile(&pod, name, content) return d.createFile(&pod, name, content)
}, },
Remove: func(name string) error { Remove: func(name string) error {
ginkgo.By(fmt.Sprintf("deleting CDI file %s on node %s", name, nodename)) klog.Background().Info("deleting CDI file", "node", nodename, "filename", name)
return d.removeFile(&pod, name) return d.removeFile(&pod, name)
}, },
}, },

View File

@ -254,6 +254,27 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
ginkgo.Context("with immediate allocation", func() { ginkgo.Context("with immediate allocation", func() {
claimTests(resourcev1alpha2.AllocationModeImmediate) claimTests(resourcev1alpha2.AllocationModeImmediate)
}) })
ginkgo.It("must deallocate after use when using delayed allocation", func(ctx context.Context) {
parameters := b.parameters()
pod := b.podExternal()
claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
b.create(ctx, parameters, claim, pod)
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
b.testPod(ctx, f.ClientSet, pod)
ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod)))
framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
ginkgo.By("waiting for claim to get deallocated")
gomega.Eventually(ctx, func(ctx context.Context) (*resourcev1alpha2.ResourceClaim, error) {
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
})
}) })
ginkgo.Context("multiple nodes", func() { ginkgo.Context("multiple nodes", func() {