Add pod disruption conditions for kubelet initiated failures

This commit is contained in:
Michal Wozniak
2022-10-10 13:58:40 +02:00
parent c519bc02e8
commit 52cd6755eb
17 changed files with 883 additions and 21 deletions

View File

@@ -27,11 +27,14 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
v1helper "k8s.io/component-helpers/scheduling/corev1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
@@ -386,7 +389,16 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
}
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
if m.evictPod(pod, gracePeriodOverride, message, annotations) {
var condition *v1.PodCondition
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
condition = &v1.PodCondition{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
Message: message,
}
}
if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) {
metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc()
return []*v1.Pod{pod}
}
@@ -492,7 +504,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
// the emptyDir usage exceeds the size limit, evict the pod
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil) {
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil, nil) {
metrics.Evictions.WithLabelValues(signalEmptyDirFsLimit).Inc()
return true
}
@@ -519,7 +531,8 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat
podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
// the total usage of pod exceeds the total size limit of containers, evict the pod
if m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String()), nil) {
message := fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String())
if m.evictPod(pod, 0, message, nil, nil) {
metrics.Evictions.WithLabelValues(signalEphemeralPodFsLimit).Inc()
return true
}
@@ -545,7 +558,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil) {
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil, nil) {
metrics.Evictions.WithLabelValues(signalEphemeralContainerFsLimit).Inc()
return true
}
@@ -556,7 +569,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
return false
}
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string, condition *v1.PodCondition) bool {
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
// do not evict such pods. Static pods are not re-admitted after evictions.
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
@@ -572,6 +585,9 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg
status.Phase = v1.PodFailed
status.Reason = Reason
status.Message = evictMsg
if condition != nil {
podutil.UpdatePodCondition(status, condition)
}
})
if err != nil {
klog.ErrorS(err, "Eviction manager: pod failed to evict", "pod", klog.KObj(pod))

View File

@@ -23,6 +23,8 @@ import (
"time"
gomock "github.com/golang/mock/gomock"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
@@ -185,6 +187,206 @@ type podToMake struct {
perLocalVolumeInodesUsed string
}
func TestMemoryPressure_VerifyPodStatus(t *testing.T) {
testCases := map[string]struct {
wantPodStatus v1.PodStatus
}{
"eviction due to memory pressure": {
wantPodStatus: v1.PodStatus{
Phase: v1.PodFailed,
Reason: "Evicted",
Message: "The node was low on resource: memory. ",
},
},
}
for name, tc := range testCases {
for _, enablePodDisruptionConditions := range []bool{false, true} {
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
podMaker := makePodWithMemoryStats
summaryStatsMaker := makeMemoryStats
podsToMake := []podToMake{
{name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"},
{name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"},
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
pods = append(pods, pod)
podStats[pod] = podStat
}
activePodsFunc := func() []*v1.Pod {
return pods
}
fakeClock := testingclock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
}
// synchronize to detect the memory pressure
manager.synchronize(diskInfoProvider, activePodsFunc)
// verify memory pressure is detected
if !manager.IsUnderMemoryPressure() {
t.Fatalf("Manager should have detected memory pressure")
}
// verify a pod is selected for eviction
if podKiller.pod == nil {
t.Fatalf("Manager should have selected a pod for eviction")
}
wantPodStatus := tc.wantPodStatus.DeepCopy()
if enablePodDisruptionConditions {
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
Type: "DisruptionTarget",
Status: "True",
Reason: "TerminationByKubelet",
Message: "The node was low on resource: memory. ",
})
}
// verify the pod status after applying the status update function
podKiller.statusFn(&podKiller.pod.Status)
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
}
})
}
}
}
func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
testCases := map[string]struct {
wantPodStatus v1.PodStatus
}{
"eviction due to disk pressure": {
wantPodStatus: v1.PodStatus{
Phase: v1.PodFailed,
Reason: "Evicted",
Message: "The node was low on resource: ephemeral-storage. ",
},
},
}
for name, tc := range testCases {
for _, enablePodDisruptionConditions := range []bool{false, true} {
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
podMaker := makePodWithDiskStats
summaryStatsMaker := makeDiskStats
podsToMake := []podToMake{
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
{name: "above-requests", requests: newResourceList("", "", "100Mi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "700Mi"},
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pods = append(pods, pod)
podStats[pod] = podStat
}
activePodsFunc := func() []*v1.Pod {
return pods
}
fakeClock := testingclock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1.5Gi", "200Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc)
// verify menager detected disk pressure
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}
// verify a pod is selected for eviction
if podKiller.pod == nil {
t.Fatalf("Manager should have selected a pod for eviction")
}
wantPodStatus := tc.wantPodStatus.DeepCopy()
if enablePodDisruptionConditions {
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
Type: "DisruptionTarget",
Status: "True",
Reason: "TerminationByKubelet",
Message: "The node was low on resource: ephemeral-storage. ",
})
}
// verify the pod status after applying the status update function
podKiller.statusFn(&podKiller.pod.Status)
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
}
})
}
}
}
// TestMemoryPressure
func TestMemoryPressure(t *testing.T) {
podMaker := makePodWithMemoryStats