3006 lines
116 KiB
Go
3006 lines
116 KiB
Go
/*
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package eviction
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"testing"
|
|
"time"
|
|
|
|
gomock "github.com/golang/mock/gomock"
|
|
"github.com/google/go-cmp/cmp"
|
|
"github.com/google/go-cmp/cmp/cmpopts"
|
|
v1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
|
"k8s.io/client-go/tools/record"
|
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
|
kubeapi "k8s.io/kubernetes/pkg/apis/core"
|
|
"k8s.io/kubernetes/pkg/apis/scheduling"
|
|
"k8s.io/kubernetes/pkg/features"
|
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
|
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
|
testingclock "k8s.io/utils/clock/testing"
|
|
"k8s.io/utils/ptr"
|
|
)
|
|
|
|
const (
|
|
lowPriority = -1
|
|
defaultPriority = 0
|
|
highPriority = 1
|
|
)
|
|
|
|
// mockPodKiller is used to testing which pod is killed
|
|
type mockPodKiller struct {
|
|
pod *v1.Pod
|
|
evict bool
|
|
statusFn func(*v1.PodStatus)
|
|
gracePeriodOverride *int64
|
|
}
|
|
|
|
// killPodNow records the pod that was killed
|
|
func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, statusFn func(*v1.PodStatus)) error {
|
|
m.pod = pod
|
|
m.statusFn = statusFn
|
|
m.evict = evict
|
|
m.gracePeriodOverride = gracePeriodOverride
|
|
return nil
|
|
}
|
|
|
|
// mockDiskInfoProvider is used to simulate testing.
|
|
type mockDiskInfoProvider struct {
|
|
dedicatedImageFs *bool
|
|
}
|
|
|
|
// HasDedicatedImageFs returns the mocked value
|
|
func (m *mockDiskInfoProvider) HasDedicatedImageFs(_ context.Context) (bool, error) {
|
|
return ptr.Deref(m.dedicatedImageFs, false), nil
|
|
}
|
|
|
|
// mockDiskGC is used to simulate invoking image and container garbage collection.
|
|
type mockDiskGC struct {
|
|
err error
|
|
imageGCInvoked bool
|
|
containerGCInvoked bool
|
|
readAndWriteSeparate bool
|
|
fakeSummaryProvider *fakeSummaryProvider
|
|
summaryAfterGC *statsapi.Summary
|
|
}
|
|
|
|
// DeleteUnusedImages returns the mocked values.
|
|
func (m *mockDiskGC) DeleteUnusedImages(_ context.Context) error {
|
|
m.imageGCInvoked = true
|
|
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
|
|
m.fakeSummaryProvider.result = m.summaryAfterGC
|
|
}
|
|
return m.err
|
|
}
|
|
|
|
// DeleteAllUnusedContainers returns the mocked value
|
|
func (m *mockDiskGC) DeleteAllUnusedContainers(_ context.Context) error {
|
|
m.containerGCInvoked = true
|
|
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
|
|
m.fakeSummaryProvider.result = m.summaryAfterGC
|
|
}
|
|
return m.err
|
|
}
|
|
|
|
func (m *mockDiskGC) IsContainerFsSeparateFromImageFs(_ context.Context) bool {
|
|
return m.readAndWriteSeparate
|
|
}
|
|
|
|
func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
|
|
pod := newPod(name, priority, []v1.Container{
|
|
newContainer(name, requests, limits),
|
|
}, nil)
|
|
podStats := newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
|
|
return pod, podStats
|
|
}
|
|
|
|
func makePodWithPIDStats(name string, priority int32, processCount uint64) (*v1.Pod, statsapi.PodStats) {
|
|
pod := newPod(name, priority, []v1.Container{
|
|
newContainer(name, nil, nil),
|
|
}, nil)
|
|
podStats := newPodProcessStats(pod, processCount)
|
|
return pod, podStats
|
|
}
|
|
|
|
func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) {
|
|
pod := newPod(name, priority, []v1.Container{
|
|
newContainer(name, requests, limits),
|
|
}, nil)
|
|
podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed))
|
|
return pod, podStats
|
|
}
|
|
|
|
func makePodWithLocalStorageCapacityIsolationOpen(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
|
|
vol := newVolume("local-volume", v1.VolumeSource{
|
|
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
SizeLimit: resource.NewQuantity(requests.Memory().Value(), resource.BinarySI),
|
|
},
|
|
})
|
|
var vols []v1.Volume
|
|
vols = append(vols, vol)
|
|
pod := newPod(name, priority, []v1.Container{
|
|
newContainer(name, requests, limits),
|
|
}, vols)
|
|
|
|
var podStats statsapi.PodStats
|
|
switch name {
|
|
case "empty-dir":
|
|
podStats = newPodMemoryStats(pod, *resource.NewQuantity(requests.Memory().Value()*2, resource.BinarySI))
|
|
case "container-ephemeral-storage-limit":
|
|
podStats = newPodMemoryStats(pod, *resource.NewQuantity(limits.StorageEphemeral().Value(), resource.BinarySI))
|
|
case "pod-ephemeral-storage-limit":
|
|
podStats = newPodMemoryStats(pod, *resource.NewQuantity(limits.StorageEphemeral().Value()*2, resource.BinarySI))
|
|
default:
|
|
podStats = newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
|
|
}
|
|
return pod, podStats
|
|
}
|
|
|
|
func makePIDStats(nodeAvailablePIDs string, numberOfRunningProcesses string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
|
|
val := resource.MustParse(nodeAvailablePIDs)
|
|
availablePIDs := int64(val.Value())
|
|
|
|
parsed := resource.MustParse(numberOfRunningProcesses)
|
|
NumberOfRunningProcesses := int64(parsed.Value())
|
|
result := &statsapi.Summary{
|
|
Node: statsapi.NodeStats{
|
|
Rlimit: &statsapi.RlimitStats{
|
|
MaxPID: &availablePIDs,
|
|
NumOfRunningProcesses: &NumberOfRunningProcesses,
|
|
},
|
|
},
|
|
Pods: []statsapi.PodStats{},
|
|
}
|
|
for _, podStat := range podStats {
|
|
result.Pods = append(result.Pods, podStat)
|
|
}
|
|
return result
|
|
}
|
|
|
|
func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
|
|
val := resource.MustParse(nodeAvailableBytes)
|
|
availableBytes := uint64(val.Value())
|
|
WorkingSetBytes := uint64(val.Value())
|
|
result := &statsapi.Summary{
|
|
Node: statsapi.NodeStats{
|
|
Memory: &statsapi.MemoryStats{
|
|
AvailableBytes: &availableBytes,
|
|
WorkingSetBytes: &WorkingSetBytes,
|
|
},
|
|
SystemContainers: []statsapi.ContainerStats{
|
|
{
|
|
Name: statsapi.SystemContainerPods,
|
|
Memory: &statsapi.MemoryStats{
|
|
AvailableBytes: &availableBytes,
|
|
WorkingSetBytes: &WorkingSetBytes,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Pods: []statsapi.PodStats{},
|
|
}
|
|
for _, podStat := range podStats {
|
|
result.Pods = append(result.Pods, podStat)
|
|
}
|
|
return result
|
|
}
|
|
|
|
type diskStats struct {
|
|
rootFsAvailableBytes string
|
|
imageFsAvailableBytes string
|
|
// optional fs
|
|
// if not specified, than will assume imagefs=containerfs
|
|
containerFsAvailableBytes string
|
|
podStats map[*v1.Pod]statsapi.PodStats
|
|
}
|
|
|
|
func makeDiskStats(diskStats diskStats) *statsapi.Summary {
|
|
rootFsVal := resource.MustParse(diskStats.rootFsAvailableBytes)
|
|
rootFsBytes := uint64(rootFsVal.Value())
|
|
rootFsCapacityBytes := uint64(rootFsVal.Value() * 2)
|
|
imageFsVal := resource.MustParse(diskStats.imageFsAvailableBytes)
|
|
imageFsBytes := uint64(imageFsVal.Value())
|
|
imageFsCapacityBytes := uint64(imageFsVal.Value() * 2)
|
|
if diskStats.containerFsAvailableBytes == "" {
|
|
diskStats.containerFsAvailableBytes = diskStats.imageFsAvailableBytes
|
|
}
|
|
containerFsVal := resource.MustParse(diskStats.containerFsAvailableBytes)
|
|
containerFsBytes := uint64(containerFsVal.Value())
|
|
containerFsCapacityBytes := uint64(containerFsVal.Value() * 2)
|
|
result := &statsapi.Summary{
|
|
Node: statsapi.NodeStats{
|
|
Fs: &statsapi.FsStats{
|
|
AvailableBytes: &rootFsBytes,
|
|
CapacityBytes: &rootFsCapacityBytes,
|
|
},
|
|
Runtime: &statsapi.RuntimeStats{
|
|
ImageFs: &statsapi.FsStats{
|
|
AvailableBytes: &imageFsBytes,
|
|
CapacityBytes: &imageFsCapacityBytes,
|
|
},
|
|
ContainerFs: &statsapi.FsStats{
|
|
AvailableBytes: &containerFsBytes,
|
|
CapacityBytes: &containerFsCapacityBytes,
|
|
},
|
|
},
|
|
},
|
|
Pods: []statsapi.PodStats{},
|
|
}
|
|
for _, podStat := range diskStats.podStats {
|
|
result.Pods = append(result.Pods, podStat)
|
|
}
|
|
return result
|
|
}
|
|
|
|
type podToMake struct {
|
|
name string
|
|
priority int32
|
|
requests v1.ResourceList
|
|
limits v1.ResourceList
|
|
memoryWorkingSet string
|
|
pidUsage uint64
|
|
rootFsUsed string
|
|
logsFsUsed string
|
|
logsFsInodesUsed string
|
|
rootFsInodesUsed string
|
|
perLocalVolumeUsed string
|
|
perLocalVolumeInodesUsed string
|
|
}
|
|
|
|
func TestMemoryPressure_VerifyPodStatus(t *testing.T) {
|
|
testCases := map[string]struct {
|
|
wantPodStatus v1.PodStatus
|
|
}{
|
|
"eviction due to memory pressure; no image fs": {
|
|
wantPodStatus: v1.PodStatus{
|
|
Phase: v1.PodFailed,
|
|
Reason: "Evicted",
|
|
Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
|
|
},
|
|
},
|
|
"eviction due to memory pressure; image fs": {
|
|
wantPodStatus: v1.PodStatus{
|
|
Phase: v1.PodFailed,
|
|
Reason: "Evicted",
|
|
Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
|
|
},
|
|
},
|
|
}
|
|
for name, tc := range testCases {
|
|
for _, enablePodDisruptionConditions := range []bool{false, true} {
|
|
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
|
|
|
podMaker := makePodWithMemoryStats
|
|
summaryStatsMaker := makeMemoryStats
|
|
podsToMake := []podToMake{
|
|
{name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"},
|
|
{name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"},
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// synchronize to detect the memory pressure
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
// verify memory pressure is detected
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Fatalf("Manager should have detected memory pressure")
|
|
}
|
|
|
|
// verify a pod is selected for eviction
|
|
if podKiller.pod == nil {
|
|
t.Fatalf("Manager should have selected a pod for eviction")
|
|
}
|
|
|
|
wantPodStatus := tc.wantPodStatus.DeepCopy()
|
|
if enablePodDisruptionConditions {
|
|
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
|
Type: "DisruptionTarget",
|
|
Status: "True",
|
|
Reason: "TerminationByKubelet",
|
|
Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
|
|
})
|
|
}
|
|
|
|
// verify the pod status after applying the status update function
|
|
podKiller.statusFn(&podKiller.pod.Status)
|
|
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
|
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestPIDPressure_VerifyPodStatus(t *testing.T) {
|
|
testCases := map[string]struct {
|
|
wantPodStatus v1.PodStatus
|
|
}{
|
|
"eviction due to pid pressure": {
|
|
wantPodStatus: v1.PodStatus{
|
|
Phase: v1.PodFailed,
|
|
Reason: "Evicted",
|
|
Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ",
|
|
},
|
|
},
|
|
}
|
|
for name, tc := range testCases {
|
|
for _, enablePodDisruptionConditions := range []bool{true, false} {
|
|
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
|
|
|
podMaker := makePodWithPIDStats
|
|
summaryStatsMaker := makePIDStats
|
|
podsToMake := []podToMake{
|
|
{name: "pod1", priority: lowPriority, pidUsage: 500},
|
|
{name: "pod2", priority: defaultPriority, pidUsage: 500},
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, 2)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalPIDAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1200"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500", "1000", podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// synchronize to detect the PID pressure
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// verify PID pressure is detected
|
|
if !manager.IsUnderPIDPressure() {
|
|
t.Fatalf("Manager should have detected PID pressure")
|
|
}
|
|
|
|
// verify a pod is selected for eviction
|
|
if podKiller.pod == nil {
|
|
t.Fatalf("Manager should have selected a pod for eviction")
|
|
}
|
|
|
|
wantPodStatus := tc.wantPodStatus.DeepCopy()
|
|
if enablePodDisruptionConditions {
|
|
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
|
Type: "DisruptionTarget",
|
|
Status: "True",
|
|
Reason: "TerminationByKubelet",
|
|
Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ",
|
|
})
|
|
}
|
|
|
|
// verify the pod status after applying the status update function
|
|
podKiller.statusFn(&podKiller.pod.Status)
|
|
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
|
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
|
|
testCases := map[string]struct {
|
|
nodeFsStats string
|
|
imageFsStats string
|
|
containerFsStats string
|
|
evictionMessage string
|
|
kubeletSeparateDiskFeature bool
|
|
writeableSeparateFromReadOnly bool
|
|
thresholdToMonitor evictionapi.Threshold
|
|
podToMakes []podToMake
|
|
dedicatedImageFs *bool
|
|
expectErr string
|
|
}{
|
|
"eviction due to disk pressure; no image fs": {
|
|
dedicatedImageFs: ptr.To(false),
|
|
nodeFsStats: "1.5Gi",
|
|
imageFsStats: "10Gi",
|
|
containerFsStats: "10Gi",
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
},
|
|
evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. ",
|
|
podToMakes: []podToMake{
|
|
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
|
|
{name: "above-requests", requests: newResourceList("", "", "100Mi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "700Mi"},
|
|
},
|
|
},
|
|
"eviction due to image disk pressure; image fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
nodeFsStats: "1Gi",
|
|
imageFsStats: "10Gi",
|
|
containerFsStats: "10Gi",
|
|
evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. ",
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalImageFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("50Gi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
|
|
{name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
|
|
},
|
|
},
|
|
"eviction due to container disk pressure; feature off; error; container fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
kubeletSeparateDiskFeature: false,
|
|
writeableSeparateFromReadOnly: true,
|
|
expectErr: "KubeletSeparateDiskGC is turned off but we still have a split filesystem",
|
|
nodeFsStats: "1Gi",
|
|
imageFsStats: "100Gi",
|
|
containerFsStats: "10Gi",
|
|
evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. ",
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalContainerFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("50Gi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
|
|
{name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
|
|
},
|
|
},
|
|
"eviction due to container disk pressure; container fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
kubeletSeparateDiskFeature: true,
|
|
writeableSeparateFromReadOnly: true,
|
|
nodeFsStats: "10Gi",
|
|
imageFsStats: "100Gi",
|
|
containerFsStats: "10Gi",
|
|
evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. ",
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("50Gi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
|
|
{name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
|
|
},
|
|
},
|
|
}
|
|
for name, tc := range testCases {
|
|
for _, enablePodDisruptionConditions := range []bool{false, true} {
|
|
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
|
|
|
podMaker := makePodWithDiskStats
|
|
summaryStatsMaker := makeDiskStats
|
|
podsToMake := tc.podToMakes
|
|
wantPodStatus := v1.PodStatus{
|
|
Phase: v1.PodFailed,
|
|
Reason: "Evicted",
|
|
Message: tc.evictionMessage,
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
|
|
diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{tc.thresholdToMonitor},
|
|
}
|
|
diskStat := diskStats{
|
|
rootFsAvailableBytes: tc.nodeFsStats,
|
|
imageFsAvailableBytes: tc.imageFsStats,
|
|
containerFsAvailableBytes: tc.containerFsStats,
|
|
podStats: podStats,
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// synchronize
|
|
pods, synchErr := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if synchErr == nil && tc.expectErr != "" {
|
|
t.Fatalf("Manager should report error but did not")
|
|
} else if tc.expectErr != "" && synchErr != nil {
|
|
if diff := cmp.Diff(tc.expectErr, synchErr.Error()); diff != "" {
|
|
t.Errorf("Unexpected error (-want,+got):\n%s", diff)
|
|
}
|
|
} else {
|
|
// verify manager detected disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure")
|
|
}
|
|
|
|
// verify a pod is selected for eviction
|
|
if podKiller.pod == nil {
|
|
t.Fatalf("Manager should have selected a pod for eviction")
|
|
}
|
|
|
|
if enablePodDisruptionConditions {
|
|
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
|
Type: "DisruptionTarget",
|
|
Status: "True",
|
|
Reason: "TerminationByKubelet",
|
|
Message: tc.evictionMessage,
|
|
})
|
|
}
|
|
|
|
// verify the pod status after applying the status update function
|
|
podKiller.statusFn(&podKiller.pod.Status)
|
|
if diff := cmp.Diff(wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
|
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestMemoryPressure
|
|
func TestMemoryPressure(t *testing.T) {
|
|
podMaker := makePodWithMemoryStats
|
|
summaryStatsMaker := makeMemoryStats
|
|
podsToMake := []podToMake{
|
|
{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
|
|
{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
|
|
{name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
|
|
{name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
|
|
{name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[4]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// create a best effort pod to test admission
|
|
bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
|
|
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// try to admit our pods (they should succeed)
|
|
expected := []bool{true, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
// induce soft threshold
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify no pod was yet killed because there has not yet been enough time passed.
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
|
|
}
|
|
|
|
// step forward in time pass the grace period
|
|
fakeClock.Step(3 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify the right pod was killed with the right grace period.
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
if podKiller.gracePeriodOverride == nil {
|
|
t.Errorf("Manager chose to kill pod but should have had a grace period override.")
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
|
|
}
|
|
// reset state
|
|
podKiller.pod = nil
|
|
podKiller.gracePeriodOverride = nil
|
|
|
|
// remove memory pressure
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("3Gi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// induce memory pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod = *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// the best-effort pod should not admit, burstable should
|
|
expected = []bool{false, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
// reduce memory pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have memory pressure (because transition period not yet met)
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// the best-effort pod should not admit, burstable should
|
|
expected = []bool{false, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
// move the clock past transition period to ensure that we stop reporting pressure
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure (because transition period met)
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// all pods should admit now
|
|
expected = []bool{true, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
}
|
|
|
|
func makeContainersByQOS(class v1.PodQOSClass) []v1.Container {
|
|
resource := newResourceList("100m", "1Gi", "")
|
|
switch class {
|
|
case v1.PodQOSGuaranteed:
|
|
return []v1.Container{newContainer("guaranteed-container", resource, resource)}
|
|
case v1.PodQOSBurstable:
|
|
return []v1.Container{newContainer("burtable-container", resource, nil)}
|
|
case v1.PodQOSBestEffort:
|
|
fallthrough
|
|
default:
|
|
return []v1.Container{newContainer("best-effort-container", nil, nil)}
|
|
}
|
|
}
|
|
|
|
func TestPIDPressure(t *testing.T) {
|
|
testCases := []struct {
|
|
name string
|
|
podsToMake []podToMake
|
|
evictPodIndex int
|
|
noPressurePIDUsage string
|
|
pressurePIDUsageWithGracePeriod string
|
|
pressurePIDUsageWithoutGracePeriod string
|
|
totalPID string
|
|
}{
|
|
{
|
|
name: "eviction due to pid pressure",
|
|
podsToMake: []podToMake{
|
|
{name: "high-priority-high-usage", priority: highPriority, pidUsage: 900},
|
|
{name: "default-priority-low-usage", priority: defaultPriority, pidUsage: 100},
|
|
{name: "default-priority-medium-usage", priority: defaultPriority, pidUsage: 400},
|
|
{name: "low-priority-high-usage", priority: lowPriority, pidUsage: 600},
|
|
{name: "low-priority-low-usage", priority: lowPriority, pidUsage: 50},
|
|
},
|
|
evictPodIndex: 3, // we expect the low-priority-high-usage pod to be evicted
|
|
noPressurePIDUsage: "300",
|
|
pressurePIDUsageWithGracePeriod: "700",
|
|
pressurePIDUsageWithoutGracePeriod: "1200",
|
|
totalPID: "2000",
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
podMaker := makePodWithPIDStats
|
|
summaryStatsMaker := makePIDStats
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range tc.podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.pidUsage)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[tc.evictPodIndex]
|
|
activePodsFunc := func() []*v1.Pod { return pods }
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalPIDAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1200"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalPIDAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1500"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
}
|
|
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// create a pod to test admission
|
|
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, 50)
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have PID pressure
|
|
if manager.IsUnderPIDPressure() {
|
|
t.Fatalf("Manager should not report PID pressure")
|
|
}
|
|
|
|
// try to admit our pod (should succeed)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
|
}
|
|
|
|
// induce soft threshold for PID pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithGracePeriod, podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// now, we should have PID pressure
|
|
if !manager.IsUnderPIDPressure() {
|
|
t.Errorf("Manager should report PID pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify no pod was yet killed because there has not yet been enough time passed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
|
|
}
|
|
|
|
// step forward in time past the grace period
|
|
fakeClock.Step(3 * time.Minute)
|
|
// no change in PID stats to simulate continued pressure
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// verify PID pressure is still reported
|
|
if !manager.IsUnderPIDPressure() {
|
|
t.Errorf("Manager should still report PID pressure")
|
|
}
|
|
|
|
// verify the right pod was killed with the right grace period.
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
if podKiller.gracePeriodOverride == nil {
|
|
t.Errorf("Manager chose to kill pod but should have had a grace period override.")
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
|
|
}
|
|
|
|
// reset state
|
|
podKiller.pod = nil
|
|
podKiller.gracePeriodOverride = nil
|
|
|
|
// remove PID pressure by simulating increased PID availability
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats) // Simulate increased PID availability
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// verify PID pressure is resolved
|
|
if manager.IsUnderPIDPressure() {
|
|
t.Errorf("Manager should not report PID pressure")
|
|
}
|
|
|
|
// re-induce PID pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithoutGracePeriod, podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// verify PID pressure is reported again
|
|
if !manager.IsUnderPIDPressure() {
|
|
t.Errorf("Manager should report PID pressure")
|
|
}
|
|
|
|
// verify the right pod was killed with the right grace period.
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
if podKiller.gracePeriodOverride == nil {
|
|
t.Errorf("Manager chose to kill pod but should have had a grace period override.")
|
|
}
|
|
observedGracePeriod = *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// try to admit our pod (should fail)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
|
}
|
|
|
|
// reduce PID pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have PID pressure (because transition period not yet met)
|
|
if !manager.IsUnderPIDPressure() {
|
|
t.Errorf("Manager should report PID pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// try to admit our pod (should fail)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
|
}
|
|
|
|
// move the clock past the transition period
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have PID pressure (because transition period met)
|
|
if manager.IsUnderPIDPressure() {
|
|
t.Errorf("Manager should not report PID pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// try to admit our pod (should succeed)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestAdmitUnderNodeConditions(t *testing.T) {
|
|
manager := &managerImpl{}
|
|
pods := []*v1.Pod{
|
|
newPod("guaranteed-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSGuaranteed), nil),
|
|
newPod("burstable-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBurstable), nil),
|
|
newPod("best-effort-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBestEffort), nil),
|
|
}
|
|
|
|
expected := []bool{true, true, true}
|
|
for i, pod := range pods {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure}
|
|
expected = []bool{true, true, false}
|
|
for i, pod := range pods {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure, v1.NodeDiskPressure}
|
|
expected = []bool{false, false, false}
|
|
for i, pod := range pods {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
}
|
|
|
|
// parseQuantity parses the specified value (if provided) otherwise returns 0 value
|
|
func parseQuantity(value string) resource.Quantity {
|
|
if len(value) == 0 {
|
|
return resource.MustParse("0")
|
|
}
|
|
return resource.MustParse(value)
|
|
}
|
|
|
|
func TestDiskPressureNodeFs(t *testing.T) {
|
|
|
|
testCases := map[string]struct {
|
|
nodeFsStats string
|
|
imageFsStats string
|
|
containerFsStats string
|
|
kubeletSeparateDiskFeature bool
|
|
writeableSeparateFromReadOnly bool
|
|
thresholdToMonitor []evictionapi.Threshold
|
|
podToMakes []podToMake
|
|
dedicatedImageFs *bool
|
|
expectErr string
|
|
inducePressureOnWhichFs string
|
|
softDiskPressure string
|
|
hardDiskPressure string
|
|
}{
|
|
"eviction due to disk pressure; no image fs": {
|
|
dedicatedImageFs: ptr.To(false),
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
inducePressureOnWhichFs: "nodefs",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
thresholdToMonitor: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to image disk pressure; image fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
inducePressureOnWhichFs: "imagefs",
|
|
thresholdToMonitor: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalImageFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalImageFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to container disk pressure; container fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
kubeletSeparateDiskFeature: true,
|
|
writeableSeparateFromReadOnly: true,
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
inducePressureOnWhichFs: "containerfs",
|
|
thresholdToMonitor: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
}
|
|
|
|
for name, tc := range testCases {
|
|
t.Run(name, func(t *testing.T) {
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
|
|
|
|
podMaker := makePodWithDiskStats
|
|
summaryStatsMaker := makeDiskStats
|
|
podsToMake := tc.podToMakes
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[0]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
|
|
diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: tc.thresholdToMonitor,
|
|
}
|
|
|
|
diskStatStart := diskStats{
|
|
rootFsAvailableBytes: tc.nodeFsStats,
|
|
imageFsAvailableBytes: tc.imageFsStats,
|
|
containerFsAvailableBytes: tc.containerFsStats,
|
|
podStats: podStats,
|
|
}
|
|
diskStatConst := diskStatStart
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// create a best effort pod to test admission
|
|
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi")
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// try to admit our pod (should succeed)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
|
}
|
|
|
|
// induce soft threshold
|
|
fakeClock.Step(1 * time.Minute)
|
|
|
|
if tc.inducePressureOnWhichFs == "nodefs" {
|
|
diskStatStart.rootFsAvailableBytes = tc.softDiskPressure
|
|
} else if tc.inducePressureOnWhichFs == "imagefs" {
|
|
diskStatStart.imageFsAvailableBytes = tc.softDiskPressure
|
|
} else if tc.inducePressureOnWhichFs == "containerfs" {
|
|
diskStatStart.containerFsAvailableBytes = tc.softDiskPressure
|
|
}
|
|
summaryProvider.result = summaryStatsMaker(diskStatStart)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify no pod was yet killed because there has not yet been enough time passed.
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
|
|
}
|
|
|
|
// step forward in time pass the grace period
|
|
fakeClock.Step(3 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatStart)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify the right pod was killed with the right grace period.
|
|
if podKiller.pod != podToEvict {
|
|
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
if podKiller.gracePeriodOverride == nil {
|
|
t.Fatalf("Manager chose to kill pod but should have had a grace period override.")
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
|
|
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
|
|
}
|
|
// reset state
|
|
podKiller.pod = nil
|
|
podKiller.gracePeriodOverride = nil
|
|
|
|
// remove disk pressure
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// induce disk pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
if tc.inducePressureOnWhichFs == "nodefs" {
|
|
diskStatStart.rootFsAvailableBytes = tc.hardDiskPressure
|
|
} else if tc.inducePressureOnWhichFs == "imagefs" {
|
|
diskStatStart.imageFsAvailableBytes = tc.hardDiskPressure
|
|
} else if tc.inducePressureOnWhichFs == "containerfs" {
|
|
diskStatStart.containerFsAvailableBytes = tc.hardDiskPressure
|
|
}
|
|
summaryProvider.result = summaryStatsMaker(diskStatStart)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager expects no error but got %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod = *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// try to admit our pod (should fail)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
|
}
|
|
|
|
// reduce disk pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
// we should have disk pressure (because transition period not yet met)
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// try to admit our pod (should fail)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
|
}
|
|
|
|
// move the clock past transition period to ensure that we stop reporting pressure
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure (because transition period met)
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// try to admit our pod (should succeed)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestMinReclaim verifies that min-reclaim works as desired.
|
|
func TestMinReclaim(t *testing.T) {
|
|
podMaker := makePodWithMemoryStats
|
|
summaryStatsMaker := makeMemoryStats
|
|
podsToMake := []podToMake{
|
|
{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
|
|
{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
|
|
{name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
|
|
{name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
|
|
{name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[4]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
MinReclaim: &evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("500Mi"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
if err != nil {
|
|
t.Errorf("Manager should not report any errors")
|
|
}
|
|
// we should not have memory pressure
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// induce memory pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// reduce memory pressure, but not below the min-reclaim amount
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("1.2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure (because transition period not yet met)
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod = *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// reduce memory pressure and ensure the min-reclaim amount
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure (because transition period not yet met)
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// move the clock past transition period to ensure that we stop reporting pressure
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure (because transition period met)
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
}
|
|
|
|
func TestNodeReclaimFuncs(t *testing.T) {
|
|
testCases := map[string]struct {
|
|
nodeFsStats string
|
|
imageFsStats string
|
|
containerFsStats string
|
|
kubeletSeparateDiskFeature bool
|
|
writeableSeparateFromReadOnly bool
|
|
expectContainerGcCall bool
|
|
expectImageGcCall bool
|
|
thresholdToMonitor evictionapi.Threshold
|
|
podToMakes []podToMake
|
|
dedicatedImageFs *bool
|
|
expectErr string
|
|
inducePressureOnWhichFs string
|
|
softDiskPressure string
|
|
hardDiskPressure string
|
|
}{
|
|
"eviction due to disk pressure; no image fs": {
|
|
dedicatedImageFs: ptr.To(false),
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
inducePressureOnWhichFs: "nodefs",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
expectContainerGcCall: true,
|
|
expectImageGcCall: true,
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
MinReclaim: &evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("500Mi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to image disk pressure; image fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
inducePressureOnWhichFs: "imagefs",
|
|
expectContainerGcCall: true,
|
|
expectImageGcCall: true,
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalImageFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
MinReclaim: &evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("500Mi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to container disk pressure; container fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
kubeletSeparateDiskFeature: true,
|
|
writeableSeparateFromReadOnly: true,
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
inducePressureOnWhichFs: "nodefs",
|
|
expectContainerGcCall: true,
|
|
expectImageGcCall: false,
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalNodeFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
MinReclaim: &evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("500Mi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to image disk pressure; container fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
kubeletSeparateDiskFeature: true,
|
|
writeableSeparateFromReadOnly: true,
|
|
nodeFsStats: "16Gi",
|
|
imageFsStats: "16Gi",
|
|
containerFsStats: "16Gi",
|
|
softDiskPressure: "1.5Gi",
|
|
hardDiskPressure: "750Mi",
|
|
inducePressureOnWhichFs: "imagefs",
|
|
expectContainerGcCall: false,
|
|
expectImageGcCall: true,
|
|
thresholdToMonitor: evictionapi.Threshold{
|
|
Signal: evictionapi.SignalImageFsAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
MinReclaim: &evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("500Mi"),
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
|
|
},
|
|
},
|
|
}
|
|
|
|
for name, tc := range testCases {
|
|
t.Run(name, func(t *testing.T) {
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
|
|
|
|
podMaker := makePodWithDiskStats
|
|
summaryStatsMaker := makeDiskStats
|
|
podsToMake := tc.podToMakes
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[0]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{tc.thresholdToMonitor},
|
|
}
|
|
diskStatStart := diskStats{
|
|
rootFsAvailableBytes: tc.nodeFsStats,
|
|
imageFsAvailableBytes: tc.imageFsStats,
|
|
containerFsAvailableBytes: tc.containerFsStats,
|
|
podStats: podStats,
|
|
}
|
|
// This is a constant that we use to test that disk pressure is over. Don't change!
|
|
diskStatConst := diskStatStart
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
|
|
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Errorf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// induce hard threshold
|
|
fakeClock.Step(1 * time.Minute)
|
|
|
|
setDiskStatsBasedOnFs := func(whichFs string, diskPressure string, diskStat diskStats) diskStats {
|
|
if tc.inducePressureOnWhichFs == "nodefs" {
|
|
diskStat.rootFsAvailableBytes = diskPressure
|
|
} else if tc.inducePressureOnWhichFs == "imagefs" {
|
|
diskStat.imageFsAvailableBytes = diskPressure
|
|
} else if tc.inducePressureOnWhichFs == "containerfs" {
|
|
diskStat.containerFsAvailableBytes = diskPressure
|
|
}
|
|
return diskStat
|
|
}
|
|
newDiskAfterHardEviction := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
|
|
summaryProvider.result = summaryStatsMaker(newDiskAfterHardEviction)
|
|
// make GC successfully return disk usage to previous levels
|
|
diskGC.summaryAfterGC = summaryStatsMaker(diskStatConst)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify image, container or both gc were called.
|
|
// split filesystem can have container gc called without image.
|
|
// same filesystem should have both.
|
|
if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
|
|
t.Fatalf("Manager should have invoked image gc")
|
|
}
|
|
|
|
// verify no pod was killed because image gc was sufficient
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager should not have killed a pod, but killed: %v", podKiller.pod.Name)
|
|
}
|
|
|
|
// reset state
|
|
diskGC.imageGCInvoked = false
|
|
diskGC.containerGCInvoked = false
|
|
|
|
// remove disk pressure
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// synchronize
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// induce hard threshold
|
|
fakeClock.Step(1 * time.Minute)
|
|
newDiskAfterHardEviction = setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
|
|
summaryProvider.result = summaryStatsMaker(newDiskAfterHardEviction)
|
|
// make GC return disk usage bellow the threshold, but not satisfying minReclaim
|
|
gcBelowThreshold := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, "1.1G", newDiskAfterHardEviction)
|
|
diskGC.summaryAfterGC = summaryStatsMaker(gcBelowThreshold)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify image, container or both gc were called.
|
|
// split filesystem can have container gc called without image.
|
|
// same filesystem should have both.
|
|
if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
|
|
t.Fatalf("Manager should have invoked image gc")
|
|
}
|
|
|
|
// verify a pod was killed because image gc was not enough to satisfy minReclaim
|
|
if podKiller.pod == nil {
|
|
t.Fatalf("Manager should have killed a pod, but didn't")
|
|
}
|
|
|
|
// reset state
|
|
diskGC.imageGCInvoked = false
|
|
diskGC.containerGCInvoked = false
|
|
podKiller.pod = nil
|
|
|
|
// remove disk pressure
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
// induce disk pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
softDiskPressure := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
|
|
summaryProvider.result = summaryStatsMaker(softDiskPressure)
|
|
// Don't reclaim any disk
|
|
diskGC.summaryAfterGC = summaryStatsMaker(softDiskPressure)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure")
|
|
}
|
|
|
|
// verify image, container or both gc were called.
|
|
// split filesystem can have container gc called without image.
|
|
// same filesystem should have both.
|
|
if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
|
|
t.Fatalf("Manager should have invoked image gc")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// reduce disk pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
diskGC.imageGCInvoked = false // reset state
|
|
diskGC.containerGCInvoked = false // reset state
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure (because transition period not yet met)
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report disk pressure")
|
|
}
|
|
|
|
if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
|
|
t.Errorf("Manager chose to perform image gc when it was not needed")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// move the clock past transition period to ensure that we stop reporting pressure
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker(diskStatConst)
|
|
diskGC.imageGCInvoked = false // reset state
|
|
diskGC.containerGCInvoked = false // reset state
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure (because transition period met)
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report disk pressure")
|
|
}
|
|
|
|
if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
|
|
t.Errorf("Manager chose to perform image gc when it was not needed")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestInodePressureFsInodes(t *testing.T) {
|
|
podMaker := func(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) {
|
|
pod := newPod(name, priority, []v1.Container{
|
|
newContainer(name, requests, limits),
|
|
}, nil)
|
|
podStats := newPodInodeStats(pod, parseQuantity(rootInodes), parseQuantity(logInodes), parseQuantity(volumeInodes))
|
|
return pod, podStats
|
|
}
|
|
summaryStatsMaker := func(rootFsInodesFree, rootFsInodes, imageFsInodesFree, imageFsInodes, containerFsInodesFree, containerFsInodes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
|
|
rootFsInodesFreeVal := resource.MustParse(rootFsInodesFree)
|
|
internalRootFsInodesFree := uint64(rootFsInodesFreeVal.Value())
|
|
rootFsInodesVal := resource.MustParse(rootFsInodes)
|
|
internalRootFsInodes := uint64(rootFsInodesVal.Value())
|
|
|
|
imageFsInodesFreeVal := resource.MustParse(imageFsInodesFree)
|
|
internalImageFsInodesFree := uint64(imageFsInodesFreeVal.Value())
|
|
imageFsInodesVal := resource.MustParse(imageFsInodes)
|
|
internalImageFsInodes := uint64(imageFsInodesVal.Value())
|
|
|
|
containerFsInodesFreeVal := resource.MustParse(containerFsInodesFree)
|
|
internalContainerFsInodesFree := uint64(containerFsInodesFreeVal.Value())
|
|
containerFsInodesVal := resource.MustParse(containerFsInodes)
|
|
internalContainerFsInodes := uint64(containerFsInodesVal.Value())
|
|
|
|
result := &statsapi.Summary{
|
|
Node: statsapi.NodeStats{
|
|
Fs: &statsapi.FsStats{
|
|
InodesFree: &internalRootFsInodesFree,
|
|
Inodes: &internalRootFsInodes,
|
|
},
|
|
Runtime: &statsapi.RuntimeStats{
|
|
ImageFs: &statsapi.FsStats{
|
|
InodesFree: &internalImageFsInodesFree,
|
|
Inodes: &internalImageFsInodes,
|
|
},
|
|
ContainerFs: &statsapi.FsStats{
|
|
InodesFree: &internalContainerFsInodesFree,
|
|
Inodes: &internalContainerFsInodes,
|
|
},
|
|
},
|
|
},
|
|
Pods: []statsapi.PodStats{},
|
|
}
|
|
for _, podStat := range podStats {
|
|
result.Pods = append(result.Pods, podStat)
|
|
}
|
|
return result
|
|
}
|
|
|
|
setINodesFreeBasedOnFs := func(whichFs string, inodesFree string, diskStat *statsapi.Summary) *statsapi.Summary {
|
|
inodesFreeVal := resource.MustParse(inodesFree)
|
|
internalFsInodesFree := uint64(inodesFreeVal.Value())
|
|
|
|
if whichFs == "nodefs" {
|
|
diskStat.Node.Fs.InodesFree = &internalFsInodesFree
|
|
} else if whichFs == "imagefs" {
|
|
diskStat.Node.Runtime.ImageFs.InodesFree = &internalFsInodesFree
|
|
} else if whichFs == "containerfs" {
|
|
diskStat.Node.Runtime.ContainerFs.InodesFree = &internalFsInodesFree
|
|
}
|
|
return diskStat
|
|
}
|
|
|
|
testCases := map[string]struct {
|
|
nodeFsInodesFree string
|
|
nodeFsInodes string
|
|
imageFsInodesFree string
|
|
imageFsInodes string
|
|
containerFsInodesFree string
|
|
containerFsInodes string
|
|
kubeletSeparateDiskFeature bool
|
|
writeableSeparateFromReadOnly bool
|
|
thresholdToMonitor []evictionapi.Threshold
|
|
podToMakes []podToMake
|
|
dedicatedImageFs *bool
|
|
expectErr string
|
|
inducePressureOnWhichFs string
|
|
softINodePressure string
|
|
hardINodePressure string
|
|
}{
|
|
"eviction due to disk pressure; no image fs": {
|
|
dedicatedImageFs: ptr.To(false),
|
|
nodeFsInodesFree: "3Mi",
|
|
nodeFsInodes: "4Mi",
|
|
imageFsInodesFree: "3Mi",
|
|
imageFsInodes: "4Mi",
|
|
containerFsInodesFree: "3Mi",
|
|
containerFsInodes: "4Mi",
|
|
inducePressureOnWhichFs: "nodefs",
|
|
softINodePressure: "1.5Mi",
|
|
hardINodePressure: "0.5Mi",
|
|
thresholdToMonitor: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalNodeFsInodesFree,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Mi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalNodeFsInodesFree,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Mi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to image disk pressure; image fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
nodeFsInodesFree: "3Mi",
|
|
nodeFsInodes: "4Mi",
|
|
imageFsInodesFree: "3Mi",
|
|
imageFsInodes: "4Mi",
|
|
containerFsInodesFree: "3Mi",
|
|
containerFsInodes: "4Mi",
|
|
softINodePressure: "1.5Mi",
|
|
hardINodePressure: "0.5Mi",
|
|
inducePressureOnWhichFs: "imagefs",
|
|
thresholdToMonitor: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalImageFsInodesFree,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Mi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalImageFsInodesFree,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Mi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
|
|
},
|
|
},
|
|
"eviction due to container disk pressure; container fs": {
|
|
dedicatedImageFs: ptr.To(true),
|
|
kubeletSeparateDiskFeature: true,
|
|
writeableSeparateFromReadOnly: true,
|
|
nodeFsInodesFree: "3Mi",
|
|
nodeFsInodes: "4Mi",
|
|
imageFsInodesFree: "3Mi",
|
|
imageFsInodes: "4Mi",
|
|
containerFsInodesFree: "3Mi",
|
|
containerFsInodes: "4Mi",
|
|
softINodePressure: "1.5Mi",
|
|
hardINodePressure: "0.5Mi",
|
|
inducePressureOnWhichFs: "nodefs",
|
|
thresholdToMonitor: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalNodeFsInodesFree,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Mi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalNodeFsInodesFree,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Mi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
podToMakes: []podToMake{
|
|
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
|
|
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
|
|
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
|
|
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
|
|
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
|
|
},
|
|
},
|
|
}
|
|
|
|
for name, tc := range testCases {
|
|
t.Run(name, func(t *testing.T) {
|
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
|
|
|
|
podMaker := podMaker
|
|
summaryStatsMaker := summaryStatsMaker
|
|
podsToMake := tc.podToMakes
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[0]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
|
|
diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: tc.thresholdToMonitor,
|
|
}
|
|
startingStatsConst := summaryStatsMaker(tc.nodeFsInodesFree, tc.nodeFsInodes, tc.imageFsInodesFree, tc.imageFsInodes, tc.containerFsInodesFree, tc.containerFsInodes, podStats)
|
|
startingStatsModified := summaryStatsMaker(tc.nodeFsInodesFree, tc.nodeFsInodes, tc.imageFsInodesFree, tc.imageFsInodes, tc.containerFsInodesFree, tc.containerFsInodes, podStats)
|
|
summaryProvider := &fakeSummaryProvider{result: startingStatsModified}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// create a best effort pod to test admission
|
|
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0")
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report inode pressure")
|
|
}
|
|
|
|
// try to admit our pod (should succeed)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
|
}
|
|
|
|
// induce soft threshold
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.softINodePressure, startingStatsModified)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report inode pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify no pod was yet killed because there has not yet been enough time passed.
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
|
|
}
|
|
|
|
// step forward in time pass the grace period
|
|
fakeClock.Step(3 * time.Minute)
|
|
summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.softINodePressure, startingStatsModified)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report inode pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify the right pod was killed with the right grace period.
|
|
if podKiller.pod != podToEvict {
|
|
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
if podKiller.gracePeriodOverride == nil {
|
|
t.Fatalf("Manager chose to kill pod but should have had a grace period override.")
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
|
|
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
|
|
}
|
|
// reset state
|
|
podKiller.pod = nil
|
|
podKiller.gracePeriodOverride = nil
|
|
|
|
// remove inode pressure
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = startingStatsConst
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report inode pressure")
|
|
}
|
|
|
|
// induce inode pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.hardINodePressure, startingStatsModified)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report inode pressure")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod = *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
|
|
// try to admit our pod (should fail)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
|
}
|
|
|
|
// reduce inode pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = startingStatsConst
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have disk pressure (because transition period not yet met)
|
|
if !manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should report inode pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// try to admit our pod (should fail)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
|
}
|
|
|
|
// move the clock past transition period to ensure that we stop reporting pressure
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = startingStatsConst
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have disk pressure (because transition period met)
|
|
if manager.IsUnderDiskPressure() {
|
|
t.Fatalf("Manager should not report inode pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// try to admit our pod (should succeed)
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
|
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestStaticCriticalPodsAreNotEvicted
|
|
func TestStaticCriticalPodsAreNotEvicted(t *testing.T) {
|
|
podMaker := makePodWithMemoryStats
|
|
summaryStatsMaker := makeMemoryStats
|
|
podsToMake := []podToMake{
|
|
{name: "critical", priority: scheduling.SystemCriticalPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "800Mi"},
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
|
|
pods[0].Annotations = map[string]string{
|
|
kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
|
|
}
|
|
// Mark the pod as critical
|
|
podPriority := scheduling.SystemCriticalPriority
|
|
pods[0].Spec.Priority = &podPriority
|
|
pods[0].Namespace = kubeapi.NamespaceSystem
|
|
|
|
podToEvict := pods[0]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{
|
|
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
|
|
}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("2Gi"),
|
|
},
|
|
GracePeriod: time.Minute * 2,
|
|
},
|
|
},
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify no pod was yet killed because there has not yet been enough time passed.
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
|
|
}
|
|
|
|
// step forward in time pass the grace period
|
|
fakeClock.Step(3 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure since soft threshold was met")
|
|
}
|
|
|
|
// verify the right pod was killed with the right grace period.
|
|
if podKiller.pod == podToEvict {
|
|
t.Errorf("Manager chose to kill critical pod: %v, but should have ignored it", podKiller.pod.Name)
|
|
}
|
|
// reset state
|
|
podKiller.pod = nil
|
|
podKiller.gracePeriodOverride = nil
|
|
|
|
// remove memory pressure
|
|
fakeClock.Step(20 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("3Gi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
pods[0].Annotations = map[string]string{
|
|
kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
|
|
}
|
|
pods[0].Spec.Priority = nil
|
|
pods[0].Namespace = kubeapi.NamespaceSystem
|
|
|
|
// induce memory pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
}
|
|
|
|
// TestAllocatableMemoryPressure
|
|
func TestAllocatableMemoryPressure(t *testing.T) {
|
|
podMaker := makePodWithMemoryStats
|
|
summaryStatsMaker := makeMemoryStats
|
|
podsToMake := []podToMake{
|
|
{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
|
|
{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
|
|
{name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
|
|
{name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
|
|
{name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
|
|
}
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
podToEvict := pods[4]
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("4Gi", podStats)}
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
}
|
|
|
|
// create a best effort pod to test admission
|
|
bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
|
|
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
|
|
|
|
// synchronize
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// try to admit our pods (they should succeed)
|
|
expected := []bool{true, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
// induce memory pressure!
|
|
fakeClock.Step(1 * time.Minute)
|
|
pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi")
|
|
podStats[pod] = podStat
|
|
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// check the right pod was killed
|
|
if podKiller.pod != podToEvict {
|
|
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
|
|
}
|
|
observedGracePeriod := *podKiller.gracePeriodOverride
|
|
if observedGracePeriod != int64(0) {
|
|
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
|
}
|
|
// reset state
|
|
podKiller.pod = nil
|
|
podKiller.gracePeriodOverride = nil
|
|
|
|
// the best-effort pod should not admit, burstable should
|
|
expected = []bool{false, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
// reduce memory pressure
|
|
fakeClock.Step(1 * time.Minute)
|
|
for pod := range podStats {
|
|
if pod.Name == "guaranteed-high-2" {
|
|
delete(podStats, pod)
|
|
}
|
|
}
|
|
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should have memory pressure (because transition period not yet met)
|
|
if !manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should report memory pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// the best-effort pod should not admit, burstable should
|
|
expected = []bool{false, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
|
|
// move the clock past transition period to ensure that we stop reporting pressure
|
|
fakeClock.Step(5 * time.Minute)
|
|
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
|
|
podKiller.pod = nil // reset state
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// we should not have memory pressure (because transition period met)
|
|
if manager.IsUnderMemoryPressure() {
|
|
t.Errorf("Manager should not report memory pressure")
|
|
}
|
|
|
|
// no pod should have been killed
|
|
if podKiller.pod != nil {
|
|
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
|
|
}
|
|
|
|
// all pods should admit now
|
|
expected = []bool{true, true}
|
|
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
|
|
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
|
|
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestUpdateMemcgThreshold(t *testing.T) {
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return []*v1.Pod{}
|
|
}
|
|
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
podKiller := &mockPodKiller{}
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
},
|
|
PodCgroupRoot: "kubepods",
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: makeMemoryStats("2Gi", map[*v1.Pod]statsapi.PodStats{})}
|
|
|
|
mockCtrl := gomock.NewController(t)
|
|
defer mockCtrl.Finish()
|
|
|
|
thresholdNotifier := NewMockThresholdNotifier(mockCtrl)
|
|
thresholdNotifier.EXPECT().UpdateThreshold(summaryProvider.result).Return(nil).Times(2)
|
|
|
|
manager := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
|
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
|
thresholdNotifiers: []ThresholdNotifier{thresholdNotifier},
|
|
}
|
|
|
|
// The UpdateThreshold method should have been called once, since this is the first run.
|
|
_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// The UpdateThreshold method should not have been called again, since not enough time has passed
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// The UpdateThreshold method should be called again since enough time has passed
|
|
fakeClock.Step(2 * notifierRefreshInterval)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
|
|
// new memory threshold notifier that returns an error
|
|
thresholdNotifier = NewMockThresholdNotifier(mockCtrl)
|
|
thresholdNotifier.EXPECT().UpdateThreshold(summaryProvider.result).Return(fmt.Errorf("error updating threshold")).Times(1)
|
|
thresholdNotifier.EXPECT().Description().Return("mock thresholdNotifier").Times(1)
|
|
manager.thresholdNotifiers = []ThresholdNotifier{thresholdNotifier}
|
|
|
|
// The UpdateThreshold method should be called because at least notifierRefreshInterval time has passed.
|
|
// The Description method should be called because UpdateThreshold returned an error
|
|
fakeClock.Step(2 * notifierRefreshInterval)
|
|
_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have an error %v", err)
|
|
}
|
|
}
|
|
|
|
func TestManagerWithLocalStorageCapacityIsolationOpen(t *testing.T) {
|
|
podMaker := makePodWithLocalStorageCapacityIsolationOpen
|
|
summaryStatsMaker := makeDiskStats
|
|
podsToMake := []podToMake{
|
|
{name: "empty-dir", requests: newResourceList("", "900Mi", ""), limits: newResourceList("", "1Gi", "")},
|
|
{name: "container-ephemeral-storage-limit", requests: newResourceList("", "", "900Mi"), limits: newResourceList("", "", "800Mi")},
|
|
{name: "pod-ephemeral-storage-limit", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "800Mi")},
|
|
}
|
|
|
|
pods := []*v1.Pod{}
|
|
podStats := map[*v1.Pod]statsapi.PodStats{}
|
|
for _, podToMake := range podsToMake {
|
|
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
|
pods = append(pods, pod)
|
|
podStats[pod] = podStat
|
|
}
|
|
|
|
diskStat := diskStats{
|
|
rootFsAvailableBytes: "1Gi",
|
|
imageFsAvailableBytes: "200Mi",
|
|
podStats: podStats,
|
|
}
|
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
|
|
|
|
config := Config{
|
|
MaxPodGracePeriodSeconds: 5,
|
|
PressureTransitionPeriod: time.Minute * 5,
|
|
Thresholds: []evictionapi.Threshold{
|
|
{
|
|
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
|
Operator: evictionapi.OpLessThan,
|
|
Value: evictionapi.ThresholdValue{
|
|
Quantity: quantityMustParse("1Gi"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
podKiller := &mockPodKiller{}
|
|
diskGC := &mockDiskGC{err: nil}
|
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
|
|
|
|
mgr := &managerImpl{
|
|
clock: fakeClock,
|
|
killPodFunc: podKiller.killPodNow,
|
|
imageGC: diskGC,
|
|
containerGC: diskGC,
|
|
config: config,
|
|
recorder: &record.FakeRecorder{},
|
|
summaryProvider: summaryProvider,
|
|
nodeRef: nodeRef,
|
|
localStorageCapacityIsolation: true,
|
|
dedicatedImageFs: diskInfoProvider.dedicatedImageFs,
|
|
}
|
|
|
|
activePodsFunc := func() []*v1.Pod {
|
|
return pods
|
|
}
|
|
|
|
evictedPods, err := mgr.synchronize(diskInfoProvider, activePodsFunc)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Manager should not have error but got %v", err)
|
|
}
|
|
if podKiller.pod == nil {
|
|
t.Fatalf("Manager should have selected a pod for eviction")
|
|
}
|
|
|
|
if diff := cmp.Diff(pods, evictedPods); diff != "" {
|
|
t.Fatalf("Unexpected evicted pod (-want,+got):\n%s", diff)
|
|
}
|
|
}
|