kubelet: Rejected pods should be filtered from admission

A pod that has been rejected by admission will have status manager
set the phase to Failed locally, which make take some time to
propagate to the apiserver. The rejected pod will be included in
admission until the apiserver propagates the change back, which
was an unintended regression when checking pod worker state as
authoritative.

A pod that is terminal in the API may still be consuming resources
on the system, so it should still be included in admission.
This commit is contained in:
Clayton Coleman 2021-09-07 11:49:20 -04:00
parent a1d089f372
commit 17d32ed0b8
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
3 changed files with 50 additions and 20 deletions

View File

@ -2217,7 +2217,7 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
if !kl.podWorkers.IsPodTerminationRequested(pod.UID) { if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
// We failed pods that we rejected, so activePods include all admitted // We failed pods that we rejected, so activePods include all admitted
// pods that are alive. // pods that are alive.
activePods := kl.filterOutTerminatedPods(existingPods) activePods := kl.filterOutInactivePods(existingPods)
// Check if we can admit the pod; if not, reject it. // Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok { if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {

View File

@ -92,16 +92,17 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
return pods, nil return pods, nil
} }
// GetActivePods returns pods that may have a running container (a // GetActivePods returns pods that have been admitted to the kubelet that
// terminated pod is one that is known to have no running containers and // are not fully terminated. This is mapped to the "desired state" of the
// will not get any more). // kubelet - what pods should be running.
// //
// TODO: This method must include pods that have been force deleted from // WARNING: Currently this list does not include pods that have been force
// the config source (and thus removed from the pod manager) but are still // deleted but may still be terminating, which means resources assigned to
// terminating. // those pods during admission may still be in use. See
// https://github.com/kubernetes/kubernetes/issues/104824
func (kl *Kubelet) GetActivePods() []*v1.Pod { func (kl *Kubelet) GetActivePods() []*v1.Pod {
allPods := kl.podManager.GetPods() allPods := kl.podManager.GetPods()
activePods := kl.filterOutTerminatedPods(allPods) activePods := kl.filterOutInactivePods(allPods)
return activePods return activePods
} }
@ -968,19 +969,32 @@ func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool {
return kl.PodResourcesAreReclaimed(pod, status) return kl.PodResourcesAreReclaimed(pod, status)
} }
// filterOutTerminatedPods returns pods that are not in a terminal phase // filterOutInactivePods returns pods that are not in a terminal phase
// or are known to be fully terminated. This method should only be used // or are known to be fully terminated. This method should only be used
// when the set of pods being filtered is upstream of the pod worker, i.e. // when the set of pods being filtered is upstream of the pod worker, i.e.
// the pods the pod manager is aware of. // the pods the pod manager is aware of.
func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod { func (kl *Kubelet) filterOutInactivePods(pods []*v1.Pod) []*v1.Pod {
filteredPods := make([]*v1.Pod, 0, len(pods)) filteredPods := make([]*v1.Pod, 0, len(pods))
for _, p := range pods { for _, p := range pods {
// if a pod is fully terminated by UID, it should be excluded from the
// list of pods
if kl.podWorkers.IsPodKnownTerminated(p.UID) { if kl.podWorkers.IsPodKnownTerminated(p.UID) {
continue continue
} }
if p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed {
// terminal pods are considered inactive UNLESS they are actively terminating
isTerminal := p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed
if !isTerminal {
// a pod that has been marked terminal within the Kubelet is considered
// inactive (may have been rejected by Kubelet admision)
if status, ok := kl.statusManager.GetPodStatus(p.UID); ok {
isTerminal = status.Phase == v1.PodSucceeded || status.Phase == v1.PodFailed
}
}
if isTerminal && !kl.podWorkers.IsPodTerminationRequested(p.UID) {
continue continue
} }
filteredPods = append(filteredPods, p) filteredPods = append(filteredPods, p)
} }
return filteredPods return filteredPods

View File

@ -1418,15 +1418,18 @@ func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error") assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
} }
func TestFilterOutTerminatedPods(t *testing.T) { func TestFilterOutInactivePods(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup() defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet kubelet := testKubelet.kubelet
pods := newTestPods(5) pods := newTestPods(8)
now := metav1.NewTime(time.Now()) now := metav1.NewTime(time.Now())
// terminal pods are excluded
pods[0].Status.Phase = v1.PodFailed pods[0].Status.Phase = v1.PodFailed
pods[1].Status.Phase = v1.PodSucceeded pods[1].Status.Phase = v1.PodSucceeded
// The pod is terminating, should not filter out.
// deleted pod is included unless it's known to be terminated
pods[2].Status.Phase = v1.PodRunning pods[2].Status.Phase = v1.PodRunning
pods[2].DeletionTimestamp = &now pods[2].DeletionTimestamp = &now
pods[2].Status.ContainerStatuses = []v1.ContainerStatus{ pods[2].Status.ContainerStatuses = []v1.ContainerStatus{
@ -1436,18 +1439,31 @@ func TestFilterOutTerminatedPods(t *testing.T) {
}, },
}}, }},
} }
// pending and running pods are included
pods[3].Status.Phase = v1.PodPending pods[3].Status.Phase = v1.PodPending
pods[4].Status.Phase = v1.PodRunning pods[4].Status.Phase = v1.PodRunning
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{ // pod that is running but has been rejected by admission is excluded
pods[2].UID: true, pods[5].Status.Phase = v1.PodRunning
pods[3].UID: true, kubelet.statusManager.SetPodStatus(pods[5], v1.PodStatus{Phase: v1.PodFailed})
pods[4].UID: true,
// pod that is running according to the api but is known terminated is excluded
pods[6].Status.Phase = v1.PodRunning
kubelet.podWorkers.(*fakePodWorkers).terminated = map[types.UID]bool{
pods[6].UID: true,
} }
expected := []*v1.Pod{pods[2], pods[3], pods[4]} // pod that is failed but still terminating is included (it may still be consuming
// resources)
pods[7].Status.Phase = v1.PodFailed
kubelet.podWorkers.(*fakePodWorkers).terminationRequested = map[types.UID]bool{
pods[7].UID: true,
}
expected := []*v1.Pod{pods[2], pods[3], pods[4], pods[7]}
kubelet.podManager.SetPods(pods) kubelet.podManager.SetPods(pods)
actual := kubelet.filterOutTerminatedPods(pods) actual := kubelet.filterOutInactivePods(pods)
assert.Equal(t, expected, actual) assert.Equal(t, expected, actual)
} }