
When kubelet initializes, runs admission for pods and possibly allocated requested resources. We need to distinguish between node reboot (no containers running) versus kubelet restart (containers potentially running). Running pods should always survive kubelet restart. This means that device allocation on admission should not be attempted, because if a container requires devices and is still running when kubelet is restarting, that container already has devices allocated and working. Thus, we need to properly detect this scenario in the allocation step and handle it explicitely. We need to inform the devicemanager about which pods are already running. Note that if container runtime is down when kubelet restarts, the approach implemented here won't work. In this scenario, so on kubelet restart containers will again fail admission, hitting https://github.com/kubernetes/kubernetes/issues/118559 again. This scenario should however be pretty rare. Signed-off-by: Francesco Romani <fromani@redhat.com>
79 lines
2.8 KiB
Go
79 lines
2.8 KiB
Go
/*
|
|
Copyright 2018 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package cm
|
|
|
|
import (
|
|
"context"
|
|
|
|
"k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
internalapi "k8s.io/cri-api/pkg/apis"
|
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
|
"k8s.io/klog/v2"
|
|
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
|
)
|
|
|
|
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
|
|
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
|
|
if len(thresholds) == 0 {
|
|
return nil
|
|
}
|
|
ret := v1.ResourceList{}
|
|
for _, threshold := range thresholds {
|
|
if threshold.Operator != evictionapi.OpLessThan {
|
|
continue
|
|
}
|
|
switch threshold.Signal {
|
|
case evictionapi.SignalMemoryAvailable:
|
|
memoryCapacity := capacity[v1.ResourceMemory]
|
|
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
|
|
ret[v1.ResourceMemory] = *value
|
|
case evictionapi.SignalNodeFsAvailable:
|
|
storageCapacity := capacity[v1.ResourceEphemeralStorage]
|
|
value := evictionapi.GetThresholdQuantity(threshold.Value, &storageCapacity)
|
|
ret[v1.ResourceEphemeralStorage] = *value
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func buildContainerMapAndRunningSetFromRuntime(ctx context.Context, runtimeService internalapi.RuntimeService) (containermap.ContainerMap, sets.String) {
|
|
podSandboxMap := make(map[string]string)
|
|
podSandboxList, _ := runtimeService.ListPodSandbox(ctx, nil)
|
|
for _, p := range podSandboxList {
|
|
podSandboxMap[p.Id] = p.Metadata.Uid
|
|
}
|
|
|
|
runningSet := sets.NewString()
|
|
containerMap := containermap.NewContainerMap()
|
|
containerList, _ := runtimeService.ListContainers(ctx, nil)
|
|
for _, c := range containerList {
|
|
if _, exists := podSandboxMap[c.PodSandboxId]; !exists {
|
|
klog.InfoS("No PodSandBox found for the container", "podSandboxId", c.PodSandboxId, "containerName", c.Metadata.Name, "containerId", c.Id)
|
|
continue
|
|
}
|
|
podUID := podSandboxMap[c.PodSandboxId]
|
|
containerMap.Add(podUID, c.Metadata.Name, c.Id)
|
|
if c.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
|
|
klog.V(4).InfoS("Container reported running", "podSandboxId", c.PodSandboxId, "podUID", podUID, "containerName", c.Metadata.Name, "containerId", c.Id)
|
|
runningSet.Insert(c.Id)
|
|
}
|
|
}
|
|
return containerMap, runningSet
|
|
}
|