dra resourceclaim controller: delete generated claims when pod is done
When a pod is done, but not getting removed yet for while, then a claim that got generated for that pod can be deleted already. This then also triggers deallocation.
This commit is contained in:
		| @@ -26,6 +26,7 @@ import ( | ||||
| 	resourcev1alpha2 "k8s.io/api/resource/v1alpha2" | ||||
| 	"k8s.io/apimachinery/pkg/api/errors" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	v1informers "k8s.io/client-go/informers/core/v1" | ||||
| @@ -42,6 +43,7 @@ import ( | ||||
| 	"k8s.io/klog/v2" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	"k8s.io/kubernetes/pkg/controller/resourceclaim/metrics" | ||||
| 	"k8s.io/utils/pointer" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| @@ -482,9 +484,54 @@ func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if len(valid) == 0 { | ||||
| 		// Claim is not reserved. If it was generated for a pod and | ||||
| 		// that pod is not going to run, the claim can be | ||||
| 		// deleted. Normally the garbage collector does that, but the | ||||
| 		// pod itself might not get deleted for a while. | ||||
| 		podName, podUID := owningPod(claim) | ||||
| 		if podName != "" { | ||||
| 			pod, err := ec.podLister.Pods(claim.Namespace).Get(podName) | ||||
| 			switch { | ||||
| 			case err == nil: | ||||
| 				// Pod already replaced or not going to run? | ||||
| 				if pod.UID != podUID || isPodDone(pod) { | ||||
| 					// We are certain that the owning pod is not going to need | ||||
| 					// the claim and therefore remove the claim. | ||||
| 					logger.V(5).Info("deleting unused generated claim", "claim", klog.KObj(claim), "pod", klog.KObj(pod)) | ||||
| 					err := ec.kubeClient.ResourceV1alpha2().ResourceClaims(claim.Namespace).Delete(ctx, claim.Name, metav1.DeleteOptions{}) | ||||
| 					if err != nil { | ||||
| 						return fmt.Errorf("delete claim: %v", err) | ||||
| 					} | ||||
| 				} else { | ||||
| 					logger.V(6).Info("wrong pod content, not deleting claim", "claim", klog.KObj(claim), "podUID", podUID, "podContent", pod) | ||||
| 				} | ||||
| 			case errors.IsNotFound(err): | ||||
| 				// We might not know the pod *yet*. Instead of doing an expensive API call, | ||||
| 				// let the garbage collector handle the case that the pod is truly gone. | ||||
| 				logger.V(5).Info("pod for claim not found", "claim", klog.KObj(claim), "pod", klog.KRef(claim.Namespace, podName)) | ||||
| 			default: | ||||
| 				return fmt.Errorf("lookup pod: %v", err) | ||||
| 			} | ||||
| 		} else { | ||||
| 			logger.V(5).Info("claim not generated for a pod", "claim", klog.KObj(claim)) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func owningPod(claim *resourcev1alpha2.ResourceClaim) (string, types.UID) { | ||||
| 	for _, owner := range claim.OwnerReferences { | ||||
| 		if pointer.BoolDeref(owner.Controller, false) && | ||||
| 			owner.APIVersion == "v1" && | ||||
| 			owner.Kind == "Pod" { | ||||
| 			return owner.Name, owner.UID | ||||
| 		} | ||||
| 	} | ||||
| 	return "", "" | ||||
| } | ||||
|  | ||||
| // podResourceClaimIndexFunc is an index function that returns ResourceClaim keys (= | ||||
| // namespace/name) for ResourceClaimTemplates in a given pod. | ||||
| func podResourceClaimIndexFunc(obj interface{}) ([]string, error) { | ||||
|   | ||||
| @@ -216,6 +216,18 @@ func TestSyncHandler(t *testing.T) { | ||||
| 			expectedClaims:  []resourcev1alpha2.ResourceClaim{*testClaimReserved}, | ||||
| 			expectedMetrics: expectedMetrics{0, 0}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "delete-claim-when-done", | ||||
| 			pods: func() []*v1.Pod { | ||||
| 				pods := []*v1.Pod{testPodWithResource.DeepCopy()} | ||||
| 				pods[0].Status.Phase = v1.PodSucceeded | ||||
| 				return pods | ||||
| 			}(), | ||||
| 			key:             claimKey(testClaimReserved), | ||||
| 			claims:          []*resourcev1alpha2.ResourceClaim{testClaimReserved}, | ||||
| 			expectedClaims:  nil, | ||||
| 			expectedMetrics: expectedMetrics{0, 0}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range tests { | ||||
|   | ||||
| @@ -212,7 +212,7 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) | ||||
| 			Rules: []rbacv1.PolicyRule{ | ||||
| 				rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("pods").RuleOrDie(), | ||||
| 				rbacv1helpers.NewRule("update").Groups(legacyGroup).Resources("pods/finalizers").RuleOrDie(), | ||||
| 				rbacv1helpers.NewRule("get", "list", "watch", "create").Groups(resourceGroup).Resources("resourceclaims").RuleOrDie(), | ||||
| 				rbacv1helpers.NewRule("get", "list", "watch", "create", "delete").Groups(resourceGroup).Resources("resourceclaims").RuleOrDie(), | ||||
| 				rbacv1helpers.NewRule("update", "patch").Groups(resourceGroup).Resources("resourceclaims/status").RuleOrDie(), | ||||
| 				eventsRule(), | ||||
| 			}, | ||||
|   | ||||
| @@ -245,6 +245,38 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu | ||||
| 					return f.ClientSet.ResourceV1alpha2().ResourceClaims(pod.Namespace).Get(ctx, claim.Name, metav1.GetOptions{}) | ||||
| 				}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.ReservedFor", gomega.BeEmpty()), "reservation should have been removed") | ||||
| 			}) | ||||
|  | ||||
| 			ginkgo.It("deletes generated claims when pod is done", func(ctx context.Context) { | ||||
| 				parameters := b.parameters() | ||||
| 				pod, template := b.podInline(allocationMode) | ||||
| 				pod.Spec.Containers[0].Command = []string{"true"} | ||||
| 				b.create(ctx, parameters, template, pod) | ||||
|  | ||||
| 				ginkgo.By("waiting for pod to finish") | ||||
| 				framework.ExpectNoError(e2epod.WaitForPodNoLongerRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace), "wait for pod to finish") | ||||
| 				ginkgo.By("waiting for claim to be deleted") | ||||
| 				gomega.Eventually(ctx, func(ctx context.Context) ([]resourcev1alpha2.ResourceClaim, error) { | ||||
| 					claims, err := f.ClientSet.ResourceV1alpha2().ResourceClaims(pod.Namespace).List(ctx, metav1.ListOptions{}) | ||||
| 					if err != nil { | ||||
| 						return nil, err | ||||
| 					} | ||||
| 					return claims.Items, nil | ||||
| 				}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.BeEmpty(), "claim should have been deleted") | ||||
| 			}) | ||||
|  | ||||
| 			ginkgo.It("does not delete generated claims when pod is restarting", func(ctx context.Context) { | ||||
| 				parameters := b.parameters() | ||||
| 				pod, template := b.podInline(allocationMode) | ||||
| 				pod.Spec.Containers[0].Command = []string{"sh", "-c", "sleep 1; exit 1"} | ||||
| 				pod.Spec.RestartPolicy = v1.RestartPolicyAlways | ||||
| 				b.create(ctx, parameters, template, pod) | ||||
|  | ||||
| 				ginkgo.By("waiting for pod to restart twice") | ||||
| 				gomega.Eventually(ctx, func(ctx context.Context) (*v1.Pod, error) { | ||||
| 					return f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) | ||||
| 				}).WithTimeout(f.Timeouts.PodStartSlow).Should(gomega.HaveField("Status.ContainerStatuses", gomega.ContainElements(gomega.HaveField("RestartCount", gomega.BeNumerically(">=", 2))))) | ||||
| 				gomega.Expect(driver.Controller.GetNumAllocations()).To(gomega.Equal(int64(1)), "number of allocations") | ||||
| 			}) | ||||
| 		} | ||||
|  | ||||
| 		ginkgo.Context("with delayed allocation", func() { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Patrick Ohly
					Patrick Ohly