Add pod disruption conditions for kubelet initiated failures
This commit is contained in:
@@ -27,11 +27,14 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
@@ -386,7 +389,16 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
|
||||
}
|
||||
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
|
||||
if m.evictPod(pod, gracePeriodOverride, message, annotations) {
|
||||
var condition *v1.PodCondition
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
condition = &v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) {
|
||||
metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc()
|
||||
return []*v1.Pod{pod}
|
||||
}
|
||||
@@ -492,7 +504,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
|
||||
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
|
||||
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
|
||||
// the emptyDir usage exceeds the size limit, evict the pod
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil) {
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil, nil) {
|
||||
metrics.Evictions.WithLabelValues(signalEmptyDirFsLimit).Inc()
|
||||
return true
|
||||
}
|
||||
@@ -519,7 +531,8 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat
|
||||
podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
|
||||
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
|
||||
// the total usage of pod exceeds the total size limit of containers, evict the pod
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String()), nil) {
|
||||
message := fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String())
|
||||
if m.evictPod(pod, 0, message, nil, nil) {
|
||||
metrics.Evictions.WithLabelValues(signalEphemeralPodFsLimit).Inc()
|
||||
return true
|
||||
}
|
||||
@@ -545,7 +558,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
||||
|
||||
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil) {
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil, nil) {
|
||||
metrics.Evictions.WithLabelValues(signalEphemeralContainerFsLimit).Inc()
|
||||
return true
|
||||
}
|
||||
@@ -556,7 +569,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string, condition *v1.PodCondition) bool {
|
||||
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
|
||||
// do not evict such pods. Static pods are not re-admitted after evictions.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
@@ -572,6 +585,9 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg
|
||||
status.Phase = v1.PodFailed
|
||||
status.Reason = Reason
|
||||
status.Message = evictMsg
|
||||
if condition != nil {
|
||||
podutil.UpdatePodCondition(status, condition)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Eviction manager: pod failed to evict", "pod", klog.KObj(pod))
|
||||
|
@@ -23,6 +23,8 @@ import (
|
||||
"time"
|
||||
|
||||
gomock "github.com/golang/mock/gomock"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@@ -185,6 +187,206 @@ type podToMake struct {
|
||||
perLocalVolumeInodesUsed string
|
||||
}
|
||||
|
||||
func TestMemoryPressure_VerifyPodStatus(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
wantPodStatus v1.PodStatus
|
||||
}{
|
||||
"eviction due to memory pressure": {
|
||||
wantPodStatus: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Reason: "Evicted",
|
||||
Message: "The node was low on resource: memory. ",
|
||||
},
|
||||
},
|
||||
}
|
||||
for name, tc := range testCases {
|
||||
for _, enablePodDisruptionConditions := range []bool{false, true} {
|
||||
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
||||
|
||||
podMaker := makePodWithMemoryStats
|
||||
summaryStatsMaker := makeMemoryStats
|
||||
podsToMake := []podToMake{
|
||||
{name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"},
|
||||
{name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"},
|
||||
}
|
||||
pods := []*v1.Pod{}
|
||||
podStats := map[*v1.Pod]statsapi.PodStats{}
|
||||
for _, podToMake := range podsToMake {
|
||||
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
||||
pods = append(pods, pod)
|
||||
podStats[pod] = podStat
|
||||
}
|
||||
activePodsFunc := func() []*v1.Pod {
|
||||
return pods
|
||||
}
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
PressureTransitionPeriod: time.Minute * 5,
|
||||
Thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)}
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
imageGC: diskGC,
|
||||
containerGC: diskGC,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
}
|
||||
|
||||
// synchronize to detect the memory pressure
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// verify memory pressure is detected
|
||||
if !manager.IsUnderMemoryPressure() {
|
||||
t.Fatalf("Manager should have detected memory pressure")
|
||||
}
|
||||
|
||||
// verify a pod is selected for eviction
|
||||
if podKiller.pod == nil {
|
||||
t.Fatalf("Manager should have selected a pod for eviction")
|
||||
}
|
||||
|
||||
wantPodStatus := tc.wantPodStatus.DeepCopy()
|
||||
if enablePodDisruptionConditions {
|
||||
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
||||
Type: "DisruptionTarget",
|
||||
Status: "True",
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "The node was low on resource: memory. ",
|
||||
})
|
||||
}
|
||||
|
||||
// verify the pod status after applying the status update function
|
||||
podKiller.statusFn(&podKiller.pod.Status)
|
||||
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
||||
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
|
||||
|
||||
testCases := map[string]struct {
|
||||
wantPodStatus v1.PodStatus
|
||||
}{
|
||||
"eviction due to disk pressure": {
|
||||
wantPodStatus: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Reason: "Evicted",
|
||||
Message: "The node was low on resource: ephemeral-storage. ",
|
||||
},
|
||||
},
|
||||
}
|
||||
for name, tc := range testCases {
|
||||
for _, enablePodDisruptionConditions := range []bool{false, true} {
|
||||
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
||||
|
||||
podMaker := makePodWithDiskStats
|
||||
summaryStatsMaker := makeDiskStats
|
||||
podsToMake := []podToMake{
|
||||
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
|
||||
{name: "above-requests", requests: newResourceList("", "", "100Mi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "700Mi"},
|
||||
}
|
||||
pods := []*v1.Pod{}
|
||||
podStats := map[*v1.Pod]statsapi.PodStats{}
|
||||
for _, podToMake := range podsToMake {
|
||||
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
|
||||
pods = append(pods, pod)
|
||||
podStats[pod] = podStat
|
||||
}
|
||||
activePodsFunc := func() []*v1.Pod {
|
||||
return pods
|
||||
}
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
PressureTransitionPeriod: time.Minute * 5,
|
||||
Thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1.5Gi", "200Gi", podStats)}
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
imageGC: diskGC,
|
||||
containerGC: diskGC,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
}
|
||||
|
||||
// synchronize
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// verify menager detected disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Fatalf("Manager should report disk pressure")
|
||||
}
|
||||
|
||||
// verify a pod is selected for eviction
|
||||
if podKiller.pod == nil {
|
||||
t.Fatalf("Manager should have selected a pod for eviction")
|
||||
}
|
||||
|
||||
wantPodStatus := tc.wantPodStatus.DeepCopy()
|
||||
if enablePodDisruptionConditions {
|
||||
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
||||
Type: "DisruptionTarget",
|
||||
Status: "True",
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "The node was low on resource: ephemeral-storage. ",
|
||||
})
|
||||
}
|
||||
|
||||
// verify the pod status after applying the status update function
|
||||
podKiller.statusFn(&podKiller.pod.Status)
|
||||
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
||||
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMemoryPressure
|
||||
func TestMemoryPressure(t *testing.T) {
|
||||
podMaker := makePodWithMemoryStats
|
||||
|
Reference in New Issue
Block a user