kubelet: Rejected pods should be filtered from admission

A pod that has been rejected by admission will have status manager
set the phase to Failed locally, which make take some time to
propagate to the apiserver. The rejected pod will be included in
admission until the apiserver propagates the change back, which
was an unintended regression when checking pod worker state as
authoritative.

A pod that is terminal in the API may still be consuming resources
on the system, so it should still be included in admission.
This commit is contained in:
Clayton Coleman 2021-09-07 11:49:20 -04:00
parent a1d089f372
commit 17d32ed0b8
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
3 changed files with 50 additions and 20 deletions

View File

@ -2217,7 +2217,7 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
// We failed pods that we rejected, so activePods include all admitted
// pods that are alive.
activePods := kl.filterOutTerminatedPods(existingPods)
activePods := kl.filterOutInactivePods(existingPods)
// Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {

View File

@ -92,16 +92,17 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
return pods, nil
}
// GetActivePods returns pods that may have a running container (a
// terminated pod is one that is known to have no running containers and
// will not get any more).
// GetActivePods returns pods that have been admitted to the kubelet that
// are not fully terminated. This is mapped to the "desired state" of the
// kubelet - what pods should be running.
//
// TODO: This method must include pods that have been force deleted from
// the config source (and thus removed from the pod manager) but are still
// terminating.
// WARNING: Currently this list does not include pods that have been force
// deleted but may still be terminating, which means resources assigned to
// those pods during admission may still be in use. See
// https://github.com/kubernetes/kubernetes/issues/104824
func (kl *Kubelet) GetActivePods() []*v1.Pod {
allPods := kl.podManager.GetPods()
activePods := kl.filterOutTerminatedPods(allPods)
activePods := kl.filterOutInactivePods(allPods)
return activePods
}
@ -968,19 +969,32 @@ func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool {
return kl.PodResourcesAreReclaimed(pod, status)
}
// filterOutTerminatedPods returns pods that are not in a terminal phase
// filterOutInactivePods returns pods that are not in a terminal phase
// or are known to be fully terminated. This method should only be used
// when the set of pods being filtered is upstream of the pod worker, i.e.
// the pods the pod manager is aware of.
func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod {
func (kl *Kubelet) filterOutInactivePods(pods []*v1.Pod) []*v1.Pod {
filteredPods := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
// if a pod is fully terminated by UID, it should be excluded from the
// list of pods
if kl.podWorkers.IsPodKnownTerminated(p.UID) {
continue
}
if p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed {
// terminal pods are considered inactive UNLESS they are actively terminating
isTerminal := p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed
if !isTerminal {
// a pod that has been marked terminal within the Kubelet is considered
// inactive (may have been rejected by Kubelet admision)
if status, ok := kl.statusManager.GetPodStatus(p.UID); ok {
isTerminal = status.Phase == v1.PodSucceeded || status.Phase == v1.PodFailed
}
}
if isTerminal && !kl.podWorkers.IsPodTerminationRequested(p.UID) {
continue
}
filteredPods = append(filteredPods, p)
}
return filteredPods

View File

@ -1418,15 +1418,18 @@ func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
}
func TestFilterOutTerminatedPods(t *testing.T) {
func TestFilterOutInactivePods(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
pods := newTestPods(5)
pods := newTestPods(8)
now := metav1.NewTime(time.Now())
// terminal pods are excluded
pods[0].Status.Phase = v1.PodFailed
pods[1].Status.Phase = v1.PodSucceeded
// The pod is terminating, should not filter out.
// deleted pod is included unless it's known to be terminated
pods[2].Status.Phase = v1.PodRunning
pods[2].DeletionTimestamp = &now
pods[2].Status.ContainerStatuses = []v1.ContainerStatus{
@ -1436,18 +1439,31 @@ func TestFilterOutTerminatedPods(t *testing.T) {
},
}},
}
// pending and running pods are included
pods[3].Status.Phase = v1.PodPending
pods[4].Status.Phase = v1.PodRunning
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
pods[2].UID: true,
pods[3].UID: true,
pods[4].UID: true,
// pod that is running but has been rejected by admission is excluded
pods[5].Status.Phase = v1.PodRunning
kubelet.statusManager.SetPodStatus(pods[5], v1.PodStatus{Phase: v1.PodFailed})
// pod that is running according to the api but is known terminated is excluded
pods[6].Status.Phase = v1.PodRunning
kubelet.podWorkers.(*fakePodWorkers).terminated = map[types.UID]bool{
pods[6].UID: true,
}
expected := []*v1.Pod{pods[2], pods[3], pods[4]}
// pod that is failed but still terminating is included (it may still be consuming
// resources)
pods[7].Status.Phase = v1.PodFailed
kubelet.podWorkers.(*fakePodWorkers).terminationRequested = map[types.UID]bool{
pods[7].UID: true,
}
expected := []*v1.Pod{pods[2], pods[3], pods[4], pods[7]}
kubelet.podManager.SetPods(pods)
actual := kubelet.filterOutTerminatedPods(pods)
actual := kubelet.filterOutInactivePods(pods)
assert.Equal(t, expected, actual)
}