kubelet: do not enter termination status if pod might need to unprepare resources
This commit is contained in:
parent
ae0f38437c
commit
abcb56defb
@ -22,7 +22,9 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
|
|
||||||
// TODO: Migrate kubelet to either use its own internal objects or client library.
|
// TODO: Migrate kubelet to either use its own internal objects or client library.
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
@ -122,6 +124,10 @@ type ContainerManager interface {
|
|||||||
// UnrepareResources unprepares pod resources
|
// UnrepareResources unprepares pod resources
|
||||||
UnprepareResources(*v1.Pod) error
|
UnprepareResources(*v1.Pod) error
|
||||||
|
|
||||||
|
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||||
|
// might need to unprepare resources.
|
||||||
|
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||||
|
|
||||||
// Implements the podresources Provider API for CPUs, Memory and Devices
|
// Implements the podresources Provider API for CPUs, Memory and Devices
|
||||||
podresources.CPUsProvider
|
podresources.CPUsProvider
|
||||||
podresources.DevicesProvider
|
podresources.DevicesProvider
|
||||||
|
@ -39,6 +39,7 @@ import (
|
|||||||
libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns"
|
libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
@ -1038,3 +1039,11 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont
|
|||||||
func (cm *containerManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
func (cm *containerManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
||||||
return cm.draManager.UnprepareResources(pod)
|
return cm.draManager.UnprepareResources(pod)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||||
|
if cm.draManager != nil {
|
||||||
|
return cm.draManager.PodMightNeedToUnprepareResources(UID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||||
@ -163,6 +164,10 @@ func (cm *containerManagerStub) UnprepareResources(*v1.Pod) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *containerManagerStub) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func NewStubContainerManager() ContainerManager {
|
func NewStubContainerManager() ContainerManager {
|
||||||
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
|
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@ import (
|
|||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
@ -260,3 +261,7 @@ func (cm *containerManagerImpl) PrepareResources(pod *v1.Pod, container *v1.Cont
|
|||||||
func (cm *containerManagerImpl) UnprepareResources(*v1.Pod) error {
|
func (cm *containerManagerImpl) UnprepareResources(*v1.Pod) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -108,3 +108,20 @@ func (cache *claimInfoCache) delete(claimName, namespace string) {
|
|||||||
|
|
||||||
delete(cache.claimInfo, claimName+namespace)
|
delete(cache.claimInfo, claimName+namespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hasPodReference checks if there is at least one claim
|
||||||
|
// that is referenced by the pod with the given UID
|
||||||
|
// This function is used indirectly by the status manager
|
||||||
|
// to check if pod can enter termination status
|
||||||
|
func (cache *claimInfoCache) hasPodReference(UID types.UID) bool {
|
||||||
|
cache.RLock()
|
||||||
|
defer cache.RUnlock()
|
||||||
|
|
||||||
|
for _, claimInfo := range cache.claimInfo {
|
||||||
|
if claimInfo.podUIDs.Has(string(UID)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -207,11 +207,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete pod UID from the cache
|
|
||||||
claimInfo.deletePodReference(pod.UID)
|
|
||||||
|
|
||||||
// Skip calling NodeUnprepareResource if other pods are still referencing it
|
// Skip calling NodeUnprepareResource if other pods are still referencing it
|
||||||
if len(claimInfo.podUIDs) > 0 {
|
if len(claimInfo.podUIDs) > 1 {
|
||||||
|
claimInfo.deletePodReference(pod.UID)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,6 +234,12 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
|||||||
claimInfo.cdiDevices, err)
|
claimInfo.cdiDevices, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Delete last pod UID only if NodeUnprepareResource call succeeds.
|
||||||
|
// This ensures that status manager doesn't enter termination status
|
||||||
|
// for the pod. This logic is implemented in the m.PodMightNeedToUnprepareResources
|
||||||
|
// and in the claimInfo.hasPodReference.
|
||||||
|
claimInfo.deletePodReference(pod.UID)
|
||||||
|
|
||||||
klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response)
|
klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response)
|
||||||
// delete resource from the cache
|
// delete resource from the cache
|
||||||
m.cache.delete(claimInfo.claimName, pod.Namespace)
|
m.cache.delete(claimInfo.claimName, pod.Namespace)
|
||||||
@ -243,3 +247,9 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PodMightNeedToUnprepareResources returns true if the pod might need to
|
||||||
|
// unprepare resources
|
||||||
|
func (m *ManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||||
|
return m.cache.hasPodReference(UID)
|
||||||
|
}
|
||||||
|
@ -18,6 +18,7 @@ package dra
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -30,6 +31,10 @@ type Manager interface {
|
|||||||
|
|
||||||
// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
|
// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
|
||||||
UnprepareResources(pod *v1.Pod) error
|
UnprepareResources(pod *v1.Pod) error
|
||||||
|
|
||||||
|
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||||
|
// might need to unprepare resources.
|
||||||
|
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// ContainerInfo contains information required by the runtime to consume prepared resources.
|
// ContainerInfo contains information required by the runtime to consume prepared resources.
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||||
@ -245,3 +246,7 @@ func (cm *FakeContainerManager) PrepareResources(pod *v1.Pod, container *v1.Cont
|
|||||||
func (cm *FakeContainerManager) UnprepareResources(*v1.Pod) error {
|
func (cm *FakeContainerManager) UnprepareResources(*v1.Pod) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *FakeContainerManager) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -927,7 +927,21 @@ func countRunningContainerStatus(status v1.PodStatus) int {
|
|||||||
// PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running
|
// PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running
|
||||||
// containers. This returns false if the pod has not yet been started or the pod is unknown.
|
// containers. This returns false if the pod has not yet been started or the pod is unknown.
|
||||||
func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool {
|
func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool {
|
||||||
return kl.podWorkers.CouldHaveRunningContainers(pod.UID)
|
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if pod might need to unprepare resources before termination
|
||||||
|
// NOTE: This is a temporary solution. This call is here to avoid changing
|
||||||
|
// status manager and its tests.
|
||||||
|
// TODO: extend PodDeletionSafetyProvider interface and implement it
|
||||||
|
// in a separate Kubelet method.
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||||
|
if kl.containerManager.PodMightNeedToUnprepareResources(pod.UID) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
|
// PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
|
||||||
|
Loading…
Reference in New Issue
Block a user