diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 1eb3716c63d..0deebaf3595 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -914,3 +914,228 @@ func TestNodeReclaimFuncs(t *testing.T) { t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) } } + +func TestDiskPressureNodeFsInodes(t *testing.T) { + // TODO: we need to know inodes used when cadvisor supports per container stats + podMaker := func(name string, requests api.ResourceList, limits api.ResourceList) (*api.Pod, statsapi.PodStats) { + pod := newPod(name, []api.Container{ + newContainer(name, requests, limits), + }, nil) + podStats := newPodInodeStats(pod) + return pod, podStats + } + summaryStatsMaker := func(rootFsInodesFree, rootFsInodes string, podStats map[*api.Pod]statsapi.PodStats) *statsapi.Summary { + rootFsInodesFreeVal := resource.MustParse(rootFsInodesFree) + internalRootFsInodesFree := uint64(rootFsInodesFreeVal.Value()) + rootFsInodesVal := resource.MustParse(rootFsInodes) + internalRootFsInodes := uint64(rootFsInodesVal.Value()) + result := &statsapi.Summary{ + Node: statsapi.NodeStats{ + Fs: &statsapi.FsStats{ + InodesFree: &internalRootFsInodesFree, + Inodes: &internalRootFsInodes, + }, + }, + Pods: []statsapi.PodStats{}, + } + for _, podStat := range podStats { + result.Pods = append(result.Pods, podStat) + } + return result + } + // TODO: pass inodes used in future when supported by cadvisor. + podsToMake := []struct { + name string + requests api.ResourceList + limits api.ResourceList + }{ + {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", "")}, + {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", "")}, + {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi")}, + {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi")}, + {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi")}, + {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi")}, + } + pods := []*api.Pod{} + podStats := map[*api.Pod]statsapi.PodStats{} + for _, podToMake := range podsToMake { + pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits) + pods = append(pods, pod) + podStats[pod] = podStat + } + activePodsFunc := func() []*api.Pod { + return pods + } + + fakeClock := clock.NewFakeClock(time.Now()) + podKiller := &mockPodKiller{} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} + imageGC := &mockImageGC{freed: int64(0), err: nil} + nodeRef := &api.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} + + config := Config{ + MaxPodGracePeriodSeconds: 5, + PressureTransitionPeriod: time.Minute * 5, + Thresholds: []Threshold{ + { + Signal: SignalNodeFsInodesFree, + Operator: OpLessThan, + Value: ThresholdValue{ + Quantity: quantityMustParse("1Mi"), + }, + }, + { + Signal: SignalNodeFsInodesFree, + Operator: OpLessThan, + Value: ThresholdValue{ + Quantity: quantityMustParse("2Mi"), + }, + GracePeriod: time.Minute * 2, + }, + }, + } + summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("3Mi", "4Mi", podStats)} + manager := &managerImpl{ + clock: fakeClock, + killPodFunc: podKiller.killPodNow, + imageGC: imageGC, + config: config, + recorder: &record.FakeRecorder{}, + summaryProvider: summaryProvider, + nodeRef: nodeRef, + nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, + thresholdsFirstObservedAt: thresholdsObservedAt{}, + } + + // create a best effort pod to test admission + podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", "")) + + // synchronize + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + + // try to admit our pod (should succeed) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { + t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + } + + // induce soft threshold + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should have disk pressure + if !manager.IsUnderDiskPressure() { + t.Errorf("Manager should report disk pressure since soft threshold was met") + } + + // verify no pod was yet killed because there has not yet been enough time passed. + if podKiller.pod != nil { + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod) + } + + // step forward in time pass the grace period + fakeClock.Step(3 * time.Minute) + summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should have disk pressure + if !manager.IsUnderDiskPressure() { + t.Errorf("Manager should report disk pressure since soft threshold was met") + } + + // verify the right pod was killed with the right grace period. + if podKiller.pod != pods[0] { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + } + if podKiller.gracePeriodOverride == nil { + t.Errorf("Manager chose to kill pod but should have had a grace period override.") + } + observedGracePeriod := *podKiller.gracePeriodOverride + if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod) + } + // reset state + podKiller.pod = nil + podKiller.gracePeriodOverride = nil + + // remove disk pressure + fakeClock.Step(20 * time.Minute) + summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + + // induce disk pressure! + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("0.5Mi", "4Mi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should have disk pressure + if !manager.IsUnderDiskPressure() { + t.Errorf("Manager should report disk pressure") + } + + // check the right pod was killed + if podKiller.pod != pods[0] { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + } + observedGracePeriod = *podKiller.gracePeriodOverride + if observedGracePeriod != int64(0) { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod) + } + + // try to admit our pod (should fail) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { + t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) + } + + // reduce disk pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats) + podKiller.pod = nil // reset state + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should have disk pressure (because transition period not yet met) + if !manager.IsUnderDiskPressure() { + t.Errorf("Manager should report disk pressure") + } + + // no pod should have been killed + if podKiller.pod != nil { + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + } + + // try to admit our pod (should fail) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { + t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) + } + + // move the clock past transition period to ensure that we stop reporting pressure + fakeClock.Step(5 * time.Minute) + summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats) + podKiller.pod = nil // reset state + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure (because transition period met) + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + + // no pod should have been killed + if podKiller.pod != nil { + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + } + + // try to admit our pod (should succeed) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { + t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + } +} diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index c2025f44ae7..8221c489728 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -41,10 +41,16 @@ const ( message = "The node was low on compute resources." // disk, in bytes. internal to this module, used to account for local disk usage. resourceDisk api.ResourceName = "disk" + // inodes, number. internal to this module, used to account for local disk inode consumption. + resourceInodes api.ResourceName = "inodes" // imagefs, in bytes. internal to this module, used to account for local image filesystem usage. resourceImageFs api.ResourceName = "imagefs" + // imagefs inodes, number. internal to this module, used to account for local image filesystem inodes. + resourceImageFsInodes api.ResourceName = "imagefsInodes" // nodefs, in bytes. internal to this module, used to account for local node root filesystem usage. resourceNodeFs api.ResourceName = "nodefs" + // nodefs inodes, number. internal to this module, used to account for local node root filesystem inodes. + resourceNodeFsInodes api.ResourceName = "nodefsInodes" ) var ( @@ -62,12 +68,16 @@ func init() { signalToNodeCondition[SignalMemoryAvailable] = api.NodeMemoryPressure signalToNodeCondition[SignalImageFsAvailable] = api.NodeDiskPressure signalToNodeCondition[SignalNodeFsAvailable] = api.NodeDiskPressure + signalToNodeCondition[SignalImageFsInodesFree] = api.NodeDiskPressure + signalToNodeCondition[SignalNodeFsInodesFree] = api.NodeDiskPressure // map signals to resources (and vice-versa) signalToResource = map[Signal]api.ResourceName{} signalToResource[SignalMemoryAvailable] = api.ResourceMemory signalToResource[SignalImageFsAvailable] = resourceImageFs + signalToResource[SignalImageFsInodesFree] = resourceImageFsInodes signalToResource[SignalNodeFsAvailable] = resourceNodeFs + signalToResource[SignalNodeFsInodesFree] = resourceNodeFsInodes resourceToSignal = map[api.ResourceName]Signal{} for key, value := range signalToResource { resourceToSignal[value] = key @@ -185,22 +195,21 @@ func parseThresholdStatement(statement string) (Threshold, error) { Percentage: percentage, }, }, nil - } else { - quantity, err := resource.ParseQuantity(quantityValue) - if err != nil { - return Threshold{}, err - } - if quantity.Sign() < 0 || quantity.IsZero() { - return Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity) - } - return Threshold{ - Signal: signal, - Operator: operator, - Value: ThresholdValue{ - Quantity: &quantity, - }, - }, nil } + quantity, err := resource.ParseQuantity(quantityValue) + if err != nil { + return Threshold{}, err + } + if quantity.Sign() < 0 || quantity.IsZero() { + return Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity) + } + return Threshold{ + Signal: signal, + Operator: operator, + Value: ThresholdValue{ + Quantity: &quantity, + }, + }, nil } // parsePercentage parses a string representing a percentage value @@ -287,6 +296,18 @@ func diskUsage(fsStats *statsapi.FsStats) *resource.Quantity { return resource.NewQuantity(usage, resource.BinarySI) } +// inodeUsage converts inodes consumed into a resource quantity. +func inodeUsage(fsStats *statsapi.FsStats) *resource.Quantity { + // TODO: cadvisor needs to support inodes used per container + // right now, cadvisor reports total inodes and inodes free per filesystem. + // this is insufficient to know how many inodes are consumed by the container. + // for example, with the overlay driver, the rootfs and each container filesystem + // will report the same total inode and inode free values but no way of knowing + // how many inodes consumed in that filesystem are charged to this container. + // for now, we report 0 as inode usage pending support in cadvisor. + return resource.NewQuantity(int64(0), resource.BinarySI) +} + // memoryUsage converts working set into a resource quantity. func memoryUsage(memStats *statsapi.MemoryStats) *resource.Quantity { if memStats == nil || memStats.WorkingSetBytes == nil { @@ -311,15 +332,18 @@ func localVolumeNames(pod *api.Pod) []string { return result } -// podDiskUsage aggregates pod disk usage for the specified stats to measure. +// podDiskUsage aggregates pod disk usage and inode consumption for the specified stats to measure. func podDiskUsage(podStats statsapi.PodStats, pod *api.Pod, statsToMeasure []fsStatsType) (api.ResourceList, error) { disk := resource.Quantity{Format: resource.BinarySI} + inodes := resource.Quantity{Format: resource.BinarySI} for _, container := range podStats.Containers { if hasFsStatsType(statsToMeasure, fsStatsRoot) { disk.Add(*diskUsage(container.Rootfs)) + inodes.Add(*inodeUsage(container.Rootfs)) } if hasFsStatsType(statsToMeasure, fsStatsLogs) { disk.Add(*diskUsage(container.Logs)) + inodes.Add(*inodeUsage(container.Logs)) } } if hasFsStatsType(statsToMeasure, fsStatsLocalVolumeSource) { @@ -328,13 +352,15 @@ func podDiskUsage(podStats statsapi.PodStats, pod *api.Pod, statsToMeasure []fsS for _, volumeStats := range podStats.VolumeStats { if volumeStats.Name == volumeName { disk.Add(*diskUsage(&volumeStats.FsStats)) + inodes.Add(*inodeUsage(&volumeStats.FsStats)) break } } } } return api.ResourceList{ - resourceDisk: disk, + resourceDisk: disk, + resourceInodes: inodes, }, nil } @@ -502,8 +528,8 @@ func memory(stats statsFunc) cmpFunc { } } -// disk compares pods by largest consumer of disk relative to request. -func disk(stats statsFunc, fsStatsToMeasure []fsStatsType) cmpFunc { +// disk compares pods by largest consumer of disk relative to request for the specified disk resource. +func disk(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource api.ResourceName) cmpFunc { return func(p1, p2 *api.Pod) int { p1Stats, found := stats(p1) // if we have no usage stats for p1, we want p2 first @@ -528,8 +554,8 @@ func disk(stats statsFunc, fsStatsToMeasure []fsStatsType) cmpFunc { // disk is best effort, so we don't measure relative to a request. // TODO: add disk as a guaranteed resource - p1Disk := p1Usage[resourceDisk] - p2Disk := p2Usage[resourceDisk] + p1Disk := p1Usage[diskResource] + p2Disk := p2Usage[diskResource] // if p2 is using more than p1, we want p2 first return p2Disk.Cmp(p1Disk) } @@ -541,9 +567,9 @@ func rankMemoryPressure(pods []*api.Pod, stats statsFunc) { } // rankDiskPressureFunc returns a rankFunc that measures the specified fs stats. -func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType) rankFunc { +func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource api.ResourceName) rankFunc { return func(pods []*api.Pod, stats statsFunc) { - orderedBy(qosComparator, disk(stats, fsStatsToMeasure)).Sort(pods) + orderedBy(qosComparator, disk(stats, fsStatsToMeasure, diskResource)).Sort(pods) } } @@ -564,6 +590,7 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv if err != nil { return nil, nil, err } + // build the function to work against for pod stats statsFunc := cachedStatsFunc(summary.Pods) // build an evaluation context for current eviction signals @@ -575,17 +602,33 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI), } } - if nodeFs := summary.Node.Fs; nodeFs != nil && nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil { - result[SignalNodeFsAvailable] = signalObservation{ - available: resource.NewQuantity(int64(*nodeFs.AvailableBytes), resource.BinarySI), - capacity: resource.NewQuantity(int64(*nodeFs.CapacityBytes), resource.BinarySI), + if nodeFs := summary.Node.Fs; nodeFs != nil { + if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil { + result[SignalNodeFsAvailable] = signalObservation{ + available: resource.NewQuantity(int64(*nodeFs.AvailableBytes), resource.BinarySI), + capacity: resource.NewQuantity(int64(*nodeFs.CapacityBytes), resource.BinarySI), + } + } + if nodeFs.InodesFree != nil && nodeFs.Inodes != nil { + result[SignalNodeFsInodesFree] = signalObservation{ + available: resource.NewQuantity(int64(*nodeFs.InodesFree), resource.BinarySI), + capacity: resource.NewQuantity(int64(*nodeFs.Inodes), resource.BinarySI), + } } } if summary.Node.Runtime != nil { - if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil && imageFs.AvailableBytes != nil && imageFs.CapacityBytes != nil { - result[SignalImageFsAvailable] = signalObservation{ - available: resource.NewQuantity(int64(*imageFs.AvailableBytes), resource.BinarySI), - capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI), + if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil { + if imageFs.AvailableBytes != nil && imageFs.CapacityBytes != nil { + result[SignalImageFsAvailable] = signalObservation{ + available: resource.NewQuantity(int64(*imageFs.AvailableBytes), resource.BinarySI), + capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI), + } + if imageFs.InodesFree != nil && imageFs.Inodes != nil { + result[SignalImageFsInodesFree] = signalObservation{ + available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.BinarySI), + capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.BinarySI), + } + } } } } @@ -785,16 +828,20 @@ func buildResourceToRankFunc(withImageFs bool) map[api.ResourceName]rankFunc { // usage of an imagefs is optional if withImageFs { // with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes - resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}) + resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk) + resourceToRankFunc[resourceNodeFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes) // with an imagefs, imagefs pod rank func for eviction only includes rootfs - resourceToRankFunc[resourceImageFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}) + resourceToRankFunc[resourceImageFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceDisk) + resourceToRankFunc[resourceImageFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceInodes) } else { // without an imagefs, nodefs pod rank func for eviction looks at all fs stats - resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}) + resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk) + resourceToRankFunc[resourceNodeFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes) } return resourceToRankFunc } +// PodIsEvicted returns true if the reported pod status is due to an eviction. func PodIsEvicted(podStatus api.PodStatus) bool { return podStatus.Phase == api.PodFailed && podStatus.Reason == reason } @@ -806,11 +853,14 @@ func buildResourceToNodeReclaimFuncs(imageGC ImageGC, withImageFs bool) map[api. if withImageFs { // with an imagefs, nodefs pressure should just delete logs resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs()} + resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteLogs()} // with an imagefs, imagefs pressure should delete unused images - resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteImages(imageGC)} + resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteImages(imageGC, true)} + resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteImages(imageGC, false)} } else { // without an imagefs, nodefs pressure should delete logs, and unused images - resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC)} + resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, true)} + resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteLogs(), deleteImages(imageGC, false)} } return resourceToReclaimFunc } @@ -824,13 +874,17 @@ func deleteLogs() nodeReclaimFunc { } // deleteImages will delete unused images to free up disk pressure. -func deleteImages(imageGC ImageGC) nodeReclaimFunc { +func deleteImages(imageGC ImageGC, reportBytesFreed bool) nodeReclaimFunc { return func() (*resource.Quantity, error) { glog.Infof("eviction manager: attempting to delete unused images") - reclaimed, err := imageGC.DeleteUnusedImages() + bytesFreed, err := imageGC.DeleteUnusedImages() if err != nil { return nil, err } + reclaimed := int64(0) + if reportBytesFreed { + reclaimed = bytesFreed + } return resource.NewQuantity(reclaimed, resource.BinarySI), nil } } diff --git a/pkg/kubelet/eviction/helpers_test.go b/pkg/kubelet/eviction/helpers_test.go index fa2fbd418ea..df0aa0eb4bb 100644 --- a/pkg/kubelet/eviction/helpers_test.go +++ b/pkg/kubelet/eviction/helpers_test.go @@ -191,6 +191,49 @@ func TestParseThresholdConfig(t *testing.T) { }, }, }, + "inode flag values": { + evictionHard: "imagefs.inodesFree<150Mi,nodefs.inodesFree<100Mi", + evictionSoft: "imagefs.inodesFree<300Mi,nodefs.inodesFree<200Mi", + evictionSoftGracePeriod: "imagefs.inodesFree=30s,nodefs.inodesFree=30s", + evictionMinReclaim: "imagefs.inodesFree=2Gi,nodefs.inodesFree=1Gi", + expectErr: false, + expectThresholds: []Threshold{ + { + Signal: SignalImageFsInodesFree, + Operator: OpLessThan, + Value: ThresholdValue{ + Quantity: quantityMustParse("150Mi"), + }, + MinReclaim: quantityMustParse("2Gi"), + }, + { + Signal: SignalNodeFsInodesFree, + Operator: OpLessThan, + Value: ThresholdValue{ + Quantity: quantityMustParse("100Mi"), + }, + MinReclaim: quantityMustParse("1Gi"), + }, + { + Signal: SignalImageFsInodesFree, + Operator: OpLessThan, + Value: ThresholdValue{ + Quantity: quantityMustParse("300Mi"), + }, + GracePeriod: gracePeriod, + MinReclaim: quantityMustParse("2Gi"), + }, + { + Signal: SignalNodeFsInodesFree, + Operator: OpLessThan, + Value: ThresholdValue{ + Quantity: quantityMustParse("200Mi"), + }, + GracePeriod: gracePeriod, + MinReclaim: quantityMustParse("1Gi"), + }, + }, + }, "invalid-signal": { evictionHard: "mem.available<150Mi", evictionSoft: "", @@ -400,7 +443,7 @@ func TestOrderedByDisk(t *testing.T) { return result, found } pods := []*api.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource})).Sort(pods) + orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) expected := []*api.Pod{pod6, pod5, pod4, pod3, pod2, pod1} for i := range expected { if pods[i] != expected[i] { @@ -466,7 +509,7 @@ func TestOrderedByQoSDisk(t *testing.T) { return result, found } pods := []*api.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - orderedBy(qosComparator, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource})).Sort(pods) + orderedBy(qosComparator, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) expected := []*api.Pod{pod2, pod1, pod4, pod3, pod6, pod5} for i := range expected { if pods[i] != expected[i] { @@ -608,6 +651,10 @@ func TestMakeSignalObservations(t *testing.T) { imageFsCapacityBytes := uint64(1024 * 1024 * 2) nodeFsAvailableBytes := uint64(1024) nodeFsCapacityBytes := uint64(1024 * 2) + imageFsInodesFree := uint64(1024) + imageFsInodes := uint64(1024 * 1024) + nodeFsInodesFree := uint64(1024) + nodeFsInodes := uint64(1024 * 1024) fakeStats := &statsapi.Summary{ Node: statsapi.NodeStats{ Memory: &statsapi.MemoryStats{ @@ -618,11 +665,15 @@ func TestMakeSignalObservations(t *testing.T) { ImageFs: &statsapi.FsStats{ AvailableBytes: &imageFsAvailableBytes, CapacityBytes: &imageFsCapacityBytes, + InodesFree: &imageFsInodesFree, + Inodes: &imageFsInodes, }, }, Fs: &statsapi.FsStats{ AvailableBytes: &nodeFsAvailableBytes, CapacityBytes: &nodeFsCapacityBytes, + InodesFree: &nodeFsInodesFree, + Inodes: &nodeFsInodes, }, }, Pods: []statsapi.PodStats{}, @@ -664,6 +715,16 @@ func TestMakeSignalObservations(t *testing.T) { if expectedBytes := int64(nodeFsCapacityBytes); nodeFsQuantity.capacity.Value() != expectedBytes { t.Errorf("Expected %v, actual: %v", expectedBytes, nodeFsQuantity.capacity.Value()) } + nodeFsInodesQuantity, found := actualObservations[SignalNodeFsInodesFree] + if !found { + t.Errorf("Expected inodes free nodefs observation: %v", err) + } + if expected := int64(nodeFsInodesFree); nodeFsInodesQuantity.available.Value() != expected { + t.Errorf("Expected %v, actual: %v", expected, nodeFsInodesQuantity.available.Value()) + } + if expected := int64(nodeFsInodes); nodeFsInodesQuantity.capacity.Value() != expected { + t.Errorf("Expected %v, actual: %v", expected, nodeFsInodesQuantity.capacity.Value()) + } imageFsQuantity, found := actualObservations[SignalImageFsAvailable] if !found { t.Errorf("Expected available imagefs observation: %v", err) @@ -674,6 +735,16 @@ func TestMakeSignalObservations(t *testing.T) { if expectedBytes := int64(imageFsCapacityBytes); imageFsQuantity.capacity.Value() != expectedBytes { t.Errorf("Expected %v, actual: %v", expectedBytes, imageFsQuantity.capacity.Value()) } + imageFsInodesQuantity, found := actualObservations[SignalImageFsInodesFree] + if !found { + t.Errorf("Expected inodes free imagefs observation: %v", err) + } + if expected := int64(imageFsInodesFree); imageFsInodesQuantity.available.Value() != expected { + t.Errorf("Expected %v, actual: %v", expected, imageFsInodesQuantity.available.Value()) + } + if expected := int64(imageFsInodes); imageFsInodesQuantity.capacity.Value() != expected { + t.Errorf("Expected %v, actual: %v", expected, imageFsInodesQuantity.capacity.Value()) + } for _, pod := range pods { podStats, found := statsFunc(pod) if !found { @@ -1204,6 +1275,22 @@ func testCompareThresholdValue(t *testing.T) { } } +// newPodInodeStats returns stats with specified usage amounts. +// TODO: in future, this should take a value for inodesUsed per container. +func newPodInodeStats(pod *api.Pod) statsapi.PodStats { + result := statsapi.PodStats{ + PodRef: statsapi.PodReference{ + Name: pod.Name, Namespace: pod.Namespace, UID: string(pod.UID), + }, + } + for range pod.Spec.Containers { + result.Containers = append(result.Containers, statsapi.ContainerStats{ + Rootfs: &statsapi.FsStats{}, + }) + } + return result +} + // newPodDiskStats returns stats with specified usage amounts. func newPodDiskStats(pod *api.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats { result := statsapi.PodStats{ diff --git a/pkg/kubelet/eviction/types.go b/pkg/kubelet/eviction/types.go index 6984148e194..3d85f44d2a1 100644 --- a/pkg/kubelet/eviction/types.go +++ b/pkg/kubelet/eviction/types.go @@ -32,8 +32,12 @@ const ( SignalMemoryAvailable Signal = "memory.available" // SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc. SignalNodeFsAvailable Signal = "nodefs.available" + // SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc. + SignalNodeFsInodesFree Signal = "nodefs.inodesFree" // SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers. SignalImageFsAvailable Signal = "imagefs.available" + // SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers. + SignalImageFsInodesFree Signal = "imagefs.inodesFree" ) // fsStatsType defines the types of filesystem stats to collect.