diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index 3937b57139f..82d4640cb44 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -22,7 +22,9 @@ import ( "strings" "time" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + // TODO: Migrate kubelet to either use its own internal objects or client library. v1 "k8s.io/api/core/v1" internalapi "k8s.io/cri-api/pkg/apis" @@ -122,6 +124,10 @@ type ContainerManager interface { // UnrepareResources unprepares pod resources UnprepareResources(*v1.Pod) error + // PodMightNeedToUnprepareResources returns true if the pod with the given UID + // might need to unprepare resources. + PodMightNeedToUnprepareResources(UID types.UID) bool + // Implements the podresources Provider API for CPUs, Memory and Devices podresources.CPUsProvider podresources.DevicesProvider diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 3268955911e..332e99e72ee 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -39,6 +39,7 @@ import ( libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" @@ -1038,3 +1039,11 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont func (cm *containerManagerImpl) UnprepareResources(pod *v1.Pod) error { return cm.draManager.UnprepareResources(pod) } + +func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool { + if cm.draManager != nil { + return cm.draManager.PodMightNeedToUnprepareResources(UID) + } + + return false +} diff --git a/pkg/kubelet/cm/container_manager_stub.go b/pkg/kubelet/cm/container_manager_stub.go index 8441befd626..4b2c1f87e8f 100644 --- a/pkg/kubelet/cm/container_manager_stub.go +++ b/pkg/kubelet/cm/container_manager_stub.go @@ -21,6 +21,7 @@ import ( "k8s.io/klog/v2" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" internalapi "k8s.io/cri-api/pkg/apis" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" @@ -163,6 +164,10 @@ func (cm *containerManagerStub) UnprepareResources(*v1.Pod) error { return nil } +func (cm *containerManagerStub) PodMightNeedToUnprepareResources(UID types.UID) bool { + return false +} + func NewStubContainerManager() ContainerManager { return &containerManagerStub{shouldResetExtendedResourceCapacity: false} } diff --git a/pkg/kubelet/cm/container_manager_windows.go b/pkg/kubelet/cm/container_manager_windows.go index fb9df4b721f..c34c8432527 100644 --- a/pkg/kubelet/cm/container_manager_windows.go +++ b/pkg/kubelet/cm/container_manager_windows.go @@ -30,6 +30,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" internalapi "k8s.io/cri-api/pkg/apis" @@ -260,3 +261,7 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont func (cm *containerManagerImpl) UnprepareResources(*v1.Pod) error { return nil } + +func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool { + return false +} diff --git a/pkg/kubelet/cm/dra/claiminfo.go b/pkg/kubelet/cm/dra/claiminfo.go index cea774237f4..354f8c8bc43 100644 --- a/pkg/kubelet/cm/dra/claiminfo.go +++ b/pkg/kubelet/cm/dra/claiminfo.go @@ -108,3 +108,20 @@ func (cache *claimInfoCache) delete(claimName, namespace string) { delete(cache.claimInfo, claimName+namespace) } + +// hasPodReference checks if there is at least one claim +// that is referenced by the pod with the given UID +// This function is used indirectly by the status manager +// to check if pod can enter termination status +func (cache *claimInfoCache) hasPodReference(UID types.UID) bool { + cache.RLock() + defer cache.RUnlock() + + for _, claimInfo := range cache.claimInfo { + if claimInfo.podUIDs.Has(string(UID)) { + return true + } + } + + return false +} diff --git a/pkg/kubelet/cm/dra/manager.go b/pkg/kubelet/cm/dra/manager.go index c5d257354df..6e702023516 100644 --- a/pkg/kubelet/cm/dra/manager.go +++ b/pkg/kubelet/cm/dra/manager.go @@ -207,11 +207,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { continue } - // Delete pod UID from the cache - claimInfo.deletePodReference(pod.UID) - // Skip calling NodeUnprepareResource if other pods are still referencing it - if len(claimInfo.podUIDs) > 0 { + if len(claimInfo.podUIDs) > 1 { + claimInfo.deletePodReference(pod.UID) continue } @@ -236,6 +234,12 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { claimInfo.cdiDevices, err) } + // Delete last pod UID only if NodeUnprepareResource call succeeds. + // This ensures that status manager doesn't enter termination status + // for the pod. This logic is implemented in the m.PodMightNeedToUnprepareResources + // and in the claimInfo.hasPodReference. + claimInfo.deletePodReference(pod.UID) + klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response) // delete resource from the cache m.cache.delete(claimInfo.claimName, pod.Namespace) @@ -243,3 +247,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { return nil } + +// PodMightNeedToUnprepareResources returns true if the pod might need to +// unprepare resources +func (m *ManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool { + return m.cache.hasPodReference(UID) +} diff --git a/pkg/kubelet/cm/dra/types.go b/pkg/kubelet/cm/dra/types.go index 0f1ab678bc1..894b2f30507 100644 --- a/pkg/kubelet/cm/dra/types.go +++ b/pkg/kubelet/cm/dra/types.go @@ -18,6 +18,7 @@ package dra import ( v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) @@ -30,6 +31,10 @@ type Manager interface { // UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources UnprepareResources(pod *v1.Pod) error + + // PodMightNeedToUnprepareResources returns true if the pod with the given UID + // might need to unprepare resources. + PodMightNeedToUnprepareResources(UID types.UID) bool } // ContainerInfo contains information required by the runtime to consume prepared resources. diff --git a/pkg/kubelet/cm/fake_container_manager.go b/pkg/kubelet/cm/fake_container_manager.go index 06ba8b872d4..3c3be7bed50 100644 --- a/pkg/kubelet/cm/fake_container_manager.go +++ b/pkg/kubelet/cm/fake_container_manager.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" internalapi "k8s.io/cri-api/pkg/apis" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" @@ -245,3 +246,7 @@ func (cm *FakeContainerManager) PrepareResources(pod *v1.Pod, container *v1.Cont func (cm *FakeContainerManager) UnprepareResources(*v1.Pod) error { return nil } + +func (cm *FakeContainerManager) PodMightNeedToUnprepareResources(UID types.UID) bool { + return false +} diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 881492a1506..2e309de82c8 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -927,7 +927,21 @@ func countRunningContainerStatus(status v1.PodStatus) int { // PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running // containers. This returns false if the pod has not yet been started or the pod is unknown. func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool { - return kl.podWorkers.CouldHaveRunningContainers(pod.UID) + if kl.podWorkers.CouldHaveRunningContainers(pod.UID) { + return true + } + + // Check if pod might need to unprepare resources before termination + // NOTE: This is a temporary solution. This call is here to avoid changing + // status manager and its tests. + // TODO: extend PodDeletionSafetyProvider interface and implement it + // in a separate Kubelet method. + if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { + if kl.containerManager.PodMightNeedToUnprepareResources(pod.UID) { + return true + } + } + return false } // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have