Merge pull request #59841 from dashpole/metrics_after_reclaim

Automatic merge from submit-queue (batch tested with PRs 59683, 59964, 59841, 59936, 59686). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Reevaluate eviction thresholds after reclaim functions

**What this PR does / why we need it**:
When the node comes under `DiskPressure` due to inodes or disk space, the eviction manager runs garbage collection functions to clean up dead containers and unused images.
Currently, we use the strategy of trying to measure the disk space and inodes freed by garbage collection.  However, as #46789 and #56573 point out, there are gaps in the implementation that can cause extra evictions even when they are not required.  Furthermore, for nodes which frequently cycle through images, it results in a large number of evictions, as running out of inodes always causes an eviction.

This PR changes this strategy to call the garbage collection functions and ignore the results.  Then, it triggers another collection of node-level metrics, and sees if the node is still under DiskPressure.
This way, we can simply observe the decrease in disk or inode usage, rather than trying to measure how much is freed.

**Which issue(s) this PR fixes**:
Fixes #46789
Fixes #56573
Related PR #56575

**Special notes for your reviewer**:
This will look cleaner after #57802  removes arguments from [makeSignalObservations](https://github.com/kubernetes/kubernetes/pull/57802/files#diff-9e5246d8c78d50ce4ba440f98663f3e9R719).

**Release note**:
```release-note
NONE
```

/sig node
/kind bug
/priority important-soon
cc @kubernetes/sig-node-pr-reviews
This commit is contained in:
Kubernetes Submit Queue
2018-02-16 16:31:33 -08:00
committed by GitHub
8 changed files with 105 additions and 83 deletions

View File

@@ -19,6 +19,8 @@ package container
import ( import (
"fmt" "fmt"
"time" "time"
"github.com/golang/glog"
) )
// Specified a policy for garbage collecting containers. // Specified a policy for garbage collecting containers.
@@ -80,5 +82,6 @@ func (cgc *realContainerGC) GarbageCollect() error {
} }
func (cgc *realContainerGC) DeleteAllUnusedContainers() error { func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
glog.Infof("attempting to delete unused containers")
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true) return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
} }

View File

@@ -349,7 +349,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim) m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods. // check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(resourceToReclaim, observations) { if m.reclaimNodeLevelResources(resourceToReclaim, capacityProvider, activePods) {
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim) glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
return nil return nil
} }
@@ -437,26 +437,31 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
} }
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required. // reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, observations signalObservations) bool { func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, capacityProvider CapacityProvider, pods []*v1.Pod) bool {
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim] nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
for _, nodeReclaimFunc := range nodeReclaimFuncs { for _, nodeReclaimFunc := range nodeReclaimFuncs {
// attempt to reclaim the pressured resource. // attempt to reclaim the pressured resource.
reclaimed, err := nodeReclaimFunc() if err := nodeReclaimFunc(); err != nil {
if err != nil {
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err) glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
} }
// update our local observations based on the amount reported to have been reclaimed.
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim. }
for _, signal := range resourceClaimToSignal[resourceToReclaim] { if len(nodeReclaimFuncs) > 0 {
value, ok := observations[signal] summary, err := m.summaryProvider.Get(true)
if !ok { if err != nil {
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal) glog.Errorf("eviction manager: failed to get get summary stats after resource reclaim: %v", err)
continue return false
}
value.available.Add(*reclaimed)
} }
// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 { // make observations and get a function to derive pod usage stats relative to those observations.
observations, _ := makeSignalObservations(summary, capacityProvider, pods)
debugLogObservations("observations after resource reclaim", observations)
// determine the set of thresholds met independent of grace period
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
if len(thresholds) == 0 {
return true return true
} }
} }

View File

@@ -88,21 +88,28 @@ func (m *mockCapacityProvider) GetNodeAllocatableReservation() v1.ResourceList {
// mockDiskGC is used to simulate invoking image and container garbage collection. // mockDiskGC is used to simulate invoking image and container garbage collection.
type mockDiskGC struct { type mockDiskGC struct {
err error err error
imageBytesFreed int64 imageGCInvoked bool
imageGCInvoked bool containerGCInvoked bool
containerGCInvoked bool fakeSummaryProvider *fakeSummaryProvider
summaryAfterGC *statsapi.Summary
} }
// DeleteUnusedImages returns the mocked values. // DeleteUnusedImages returns the mocked values.
func (m *mockDiskGC) DeleteUnusedImages() (int64, error) { func (m *mockDiskGC) DeleteUnusedImages() error {
m.imageGCInvoked = true m.imageGCInvoked = true
return m.imageBytesFreed, m.err if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
m.fakeSummaryProvider.result = m.summaryAfterGC
}
return m.err
} }
// DeleteAllUnusedContainers returns the mocked value // DeleteAllUnusedContainers returns the mocked value
func (m *mockDiskGC) DeleteAllUnusedContainers() error { func (m *mockDiskGC) DeleteAllUnusedContainers() error {
m.containerGCInvoked = true m.containerGCInvoked = true
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
m.fakeSummaryProvider.result = m.summaryAfterGC
}
return m.err return m.err
} }
@@ -211,7 +218,7 @@ func TestMemoryPressure(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{ config := Config{
@@ -239,7 +246,8 @@ func TestMemoryPressure(t *testing.T) {
manager := &managerImpl{ manager := &managerImpl{
clock: fakeClock, clock: fakeClock,
killPodFunc: podKiller.killPodNow, killPodFunc: podKiller.killPodNow,
imageGC: imageGC, imageGC: diskGC,
containerGC: diskGC,
config: config, config: config,
recorder: &record.FakeRecorder{}, recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider, summaryProvider: summaryProvider,
@@ -432,7 +440,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{ config := Config{
@@ -631,7 +639,7 @@ func TestMinReclaim(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{ config := Config{
@@ -774,8 +782,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
imageGcFree := resource.MustParse("700Mi")
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{ config := Config{
@@ -795,6 +801,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
}, },
} }
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)} summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
manager := &managerImpl{ manager := &managerImpl{
clock: fakeClock, clock: fakeClock,
killPodFunc: podKiller.killPodNow, killPodFunc: podKiller.killPodNow,
@@ -819,6 +826,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
// induce hard threshold // induce hard threshold
fakeClock.Step(1 * time.Minute) fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats) summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
// make GC successfully return disk usage to previous levels
diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should have disk pressure // we should have disk pressure
@@ -842,7 +851,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
// remove disk pressure // remove disk pressure
fakeClock.Step(20 * time.Minute) fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should not have disk pressure // we should not have disk pressure
@@ -853,6 +861,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
// induce disk pressure! // induce disk pressure!
fakeClock.Step(1 * time.Minute) fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats) summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
// Dont reclaim any disk
diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should have disk pressure // we should have disk pressure
@@ -972,7 +982,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{ config := Config{
@@ -1175,7 +1185,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{ nodeRef := &v1.ObjectReference{
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "", Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
} }
@@ -1308,7 +1318,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
podKiller := &mockPodKiller{} podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")}) capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil} diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{ config := Config{

View File

@@ -1071,38 +1071,15 @@ func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, w
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{} resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{} resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
// with an imagefs, imagefs pressure should delete unused images // with an imagefs, imagefs pressure should delete unused images
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)} resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)} resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
} else { } else {
// without an imagefs, nodefs pressure should delete logs, and unused images // without an imagefs, nodefs pressure should delete logs, and unused images
// since imagefs and nodefs share a common device, they share common reclaim functions // since imagefs and nodefs share a common device, they share common reclaim functions
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)} resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)} resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)} resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)} resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
} }
return resourceToReclaimFunc return resourceToReclaimFunc
} }
// deleteTerminatedContainers will delete terminated containers to free up disk pressure.
func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
glog.Infof("eviction manager: attempting to delete unused containers")
err := containerGC.DeleteAllUnusedContainers()
// Calculating bytes freed is not yet supported.
return resource.NewQuantity(int64(0), resource.BinarySI), err
}
}
// deleteImages will delete unused images to free up disk pressure.
func deleteImages(imageGC ImageGC, reportBytesFreed bool) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
glog.Infof("eviction manager: attempting to delete unused images")
bytesFreed, err := imageGC.DeleteUnusedImages()
reclaimed := int64(0)
if reportBytesFreed {
reclaimed = bytesFreed
}
return resource.NewQuantity(reclaimed, resource.BinarySI), err
}
}

View File

@@ -81,15 +81,13 @@ type CapacityProvider interface {
// ImageGC is responsible for performing garbage collection of unused images. // ImageGC is responsible for performing garbage collection of unused images.
type ImageGC interface { type ImageGC interface {
// DeleteUnusedImages deletes unused images and returns the number of bytes freed, and an error. // DeleteUnusedImages deletes unused images.
// This returns the bytes freed even if an error is returned. DeleteUnusedImages() error
DeleteUnusedImages() (int64, error)
} }
// ContainerGC is responsible for performing garbage collection of unused containers. // ContainerGC is responsible for performing garbage collection of unused containers.
type ContainerGC interface { type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted. // DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
// It returns an error if it is unsuccessful.
DeleteAllUnusedContainers() error DeleteAllUnusedContainers() error
} }
@@ -134,9 +132,7 @@ type thresholdsObservedAt map[evictionapi.Threshold]time.Time
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods. // nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
// Returns the quantity of resources reclaimed and an error, if applicable. type nodeReclaimFunc func() error
// nodeReclaimFunc return the resources reclaimed even if an error occurs.
type nodeReclaimFunc func() (*resource.Quantity, error)
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc // nodeReclaimFuncs is an ordered list of nodeReclaimFunc
type nodeReclaimFuncs []nodeReclaimFunc type nodeReclaimFuncs []nodeReclaimFunc

View File

@@ -56,8 +56,8 @@ type ImageGCManager interface {
GetImageList() ([]container.Image, error) GetImageList() ([]container.Image, error)
// Delete all unused images and returns the number of bytes freed. The number of bytes freed is always returned. // Delete all unused images.
DeleteUnusedImages() (int64, error) DeleteUnusedImages() error
} }
// A policy for garbage collecting images. Policy defines an allowed band in // A policy for garbage collecting images. Policy defines an allowed band in
@@ -308,8 +308,10 @@ func (im *realImageGCManager) GarbageCollect() error {
return nil return nil
} }
func (im *realImageGCManager) DeleteUnusedImages() (int64, error) { func (im *realImageGCManager) DeleteUnusedImages() error {
return im.freeSpace(math.MaxInt64, time.Now()) glog.Infof("attempting to delete unused images")
_, err := im.freeSpace(math.MaxInt64, time.Now())
return err
} }
// Tries to free bytesToFree worth of images on the disk. // Tries to free bytesToFree worth of images on the disk.

View File

@@ -187,10 +187,10 @@ func TestDeleteUnusedImagesExemptSandboxImage(t *testing.T) {
}, },
} }
spaceFreed, err := manager.DeleteUnusedImages() err := manager.DeleteUnusedImages()
assert := assert.New(t) assert := assert.New(t)
assert.Len(fakeRuntime.ImageList, 1)
require.NoError(t, err) require.NoError(t, err)
assert.EqualValues(0, spaceFreed)
} }
func TestDetectImagesContainerStopped(t *testing.T) { func TestDetectImagesContainerStopped(t *testing.T) {
@@ -291,10 +291,9 @@ func TestDeleteUnusedImagesRemoveAllUnusedImages(t *testing.T) {
}}, }},
} }
spaceFreed, err := manager.DeleteUnusedImages() err := manager.DeleteUnusedImages()
assert := assert.New(t) assert := assert.New(t)
require.NoError(t, err) require.NoError(t, err)
assert.EqualValues(3072, spaceFreed)
assert.Len(fakeRuntime.ImageList, 1) assert.Len(fakeRuntime.ImageList, 1)
} }

View File

@@ -52,7 +52,8 @@ const (
pressureDelay = 20 * time.Second pressureDelay = 20 * time.Second
testContextFmt = "when we run containers that should cause %s" testContextFmt = "when we run containers that should cause %s"
noPressure = v1.NodeConditionType("NoPressure") noPressure = v1.NodeConditionType("NoPressure")
lotsOfDisk = 10240 // 10 Gb in Mb lotsOfDisk = 10240 // 10 Gb in Mb
lotsOfFiles = 1000000000 // 1 billion
) )
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods. // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
@@ -76,11 +77,11 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{ runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
{ {
evictionPriority: 1, evictionPriority: 1,
pod: inodeConsumingPod("container-inode-hog", nil), pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
}, },
{ {
evictionPriority: 1, evictionPriority: 1,
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}), pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
}, },
{ {
evictionPriority: 0, evictionPriority: 0,
@@ -90,6 +91,35 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
}) })
}) })
// ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
// Disk pressure is induced by pulling large images
var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", func() {
f := framework.NewDefaultFramework("image-gc-eviction-test")
pressureTimeout := 10 * time.Minute
expectedNodeCondition := v1.NodeDiskPressure
inodesConsumed := uint64(100000)
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
summary := eventuallyGetSummary()
inodesFree := *summary.Node.Fs.InodesFree
if inodesFree <= inodesConsumed {
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
}
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
// Consume enough inodes to induce disk pressure,
// but expect that image garbage collection can reduce it enough to avoid an eviction
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
{
evictionPriority: 0,
pod: inodeConsumingPod("container-inode", 110000, nil),
},
})
})
})
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods. // MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved. // Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() { var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
@@ -630,19 +660,19 @@ const (
volumeName = "test-volume" volumeName = "test-volume"
) )
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod { func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
// Each iteration creates an empty file // Each iteration creates an empty file
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;") return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
} }
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod { func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
// Each iteration writes 1 Mb, so do diskConsumedMB iterations. // Each iteration writes 1 Mb, so do diskConsumedMB iterations.
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB)+" do dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null ; i=$(($i+1)); done; while true; do sleep 5; done") return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
} }
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements. // podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command. // If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod { func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
path := "" path := ""
volumeMounts := []v1.VolumeMount{} volumeMounts := []v1.VolumeMount{}
volumes := []v1.Volume{} volumes := []v1.Volume{}
@@ -662,7 +692,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
Command: []string{ Command: []string{
"sh", "sh",
"-c", "-c",
fmt.Sprintf(command, filepath.Join(path, "file")), fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
}, },
Resources: resources, Resources: resources,
VolumeMounts: volumeMounts, VolumeMounts: volumeMounts,