Merge pull request #42204 from dashpole/allocatable_eviction
Automatic merge from submit-queue Eviction Manager Enforces Allocatable Thresholds This PR modifies the eviction manager to enforce node allocatable thresholds for memory as described in kubernetes/community#348. This PR should be merged after #41234. cc @kubernetes/sig-node-pr-reviews @kubernetes/sig-node-feature-requests @vishh ** Why is this a bug/regression** Kubelet uses `oom_score_adj` to enforce QoS policies. But the `oom_score_adj` is based on overall memory requested, which means that a Burstable pod that requested a lot of memory can lead to OOM kills for Guaranteed pods, which violates QoS. Even worse, we have observed system daemons like kubelet or kube-proxy being killed by the OOM killer. Without this PR, v1.6 will have node stability issues and regressions in an existing GA feature `out of Resource` handling.
This commit is contained in:
		| @@ -520,7 +520,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) { | ||||
| 		var hardEvictionThresholds []evictionapi.Threshold | ||||
| 		// If the user requested to ignore eviction thresholds, then do not set valid values for hardEvictionThresholds here. | ||||
| 		if !s.ExperimentalNodeAllocatableIgnoreEvictionThreshold { | ||||
| 			hardEvictionThresholds, err = eviction.ParseThresholdConfig(s.EvictionHard, "", "", "") | ||||
| 			hardEvictionThresholds, err = eviction.ParseThresholdConfig([]string{}, s.EvictionHard, "", "", "") | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|   | ||||
| @@ -33,6 +33,7 @@ go_test( | ||||
|         "//pkg/api:go_default_library", | ||||
|         "//pkg/api/v1:go_default_library", | ||||
|         "//pkg/kubelet/api/v1alpha1/stats:go_default_library", | ||||
|         "//pkg/kubelet/cm:go_default_library", | ||||
|         "//pkg/kubelet/eviction/api:go_default_library", | ||||
|         "//pkg/kubelet/lifecycle:go_default_library", | ||||
|         "//pkg/kubelet/types:go_default_library", | ||||
|   | ||||
| @@ -36,6 +36,8 @@ const ( | ||||
| 	SignalImageFsAvailable Signal = "imagefs.available" | ||||
| 	// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers. | ||||
| 	SignalImageFsInodesFree Signal = "imagefs.inodesFree" | ||||
| 	// SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes. | ||||
| 	SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available" | ||||
| ) | ||||
|  | ||||
| // ThresholdOperator is the operator used to express a Threshold. | ||||
|   | ||||
| @@ -134,9 +134,9 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd | ||||
| } | ||||
|  | ||||
| // Start starts the control loop to observe and response to low compute resources. | ||||
| func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, monitoringInterval time.Duration) { | ||||
| func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, nodeProvider NodeProvider, monitoringInterval time.Duration) { | ||||
| 	// start the eviction manager monitoring | ||||
| 	go wait.Until(func() { m.synchronize(diskInfoProvider, podFunc) }, monitoringInterval, wait.NeverStop) | ||||
| 	go wait.Until(func() { m.synchronize(diskInfoProvider, podFunc, nodeProvider) }, monitoringInterval, wait.NeverStop) | ||||
| } | ||||
|  | ||||
| // IsUnderMemoryPressure returns true if the node is under memory pressure. | ||||
| @@ -187,7 +187,7 @@ func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observatio | ||||
| } | ||||
|  | ||||
| // synchronize is the main control loop that enforces eviction thresholds. | ||||
| func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) { | ||||
| func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, nodeProvider NodeProvider) { | ||||
| 	// if we have nothing to do, just return | ||||
| 	thresholds := m.config.Thresholds | ||||
| 	if len(thresholds) == 0 { | ||||
| @@ -209,7 +209,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act | ||||
| 	} | ||||
|  | ||||
| 	// make observations and get a function to derive pod usage stats relative to those observations. | ||||
| 	observations, statsFunc, err := makeSignalObservations(m.summaryProvider) | ||||
| 	observations, statsFunc, err := makeSignalObservations(m.summaryProvider, nodeProvider) | ||||
| 	if err != nil { | ||||
| 		glog.Errorf("eviction manager: unexpected err: %v", err) | ||||
| 		return | ||||
| @@ -224,7 +224,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act | ||||
| 		err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) { | ||||
| 			glog.Infof("soft memory eviction threshold crossed at %s", desc) | ||||
| 			// TODO wait grace period for soft memory limit | ||||
| 			m.synchronize(diskInfoProvider, podFunc) | ||||
| 			m.synchronize(diskInfoProvider, podFunc, nodeProvider) | ||||
| 		}) | ||||
| 		if err != nil { | ||||
| 			glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err) | ||||
| @@ -232,7 +232,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act | ||||
| 		// start hard memory notification | ||||
| 		err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) { | ||||
| 			glog.Infof("hard memory eviction threshold crossed at %s", desc) | ||||
| 			m.synchronize(diskInfoProvider, podFunc) | ||||
| 			m.synchronize(diskInfoProvider, podFunc, nodeProvider) | ||||
| 		}) | ||||
| 		if err != nil { | ||||
| 			glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err) | ||||
|   | ||||
| @@ -59,6 +59,24 @@ func (m *mockDiskInfoProvider) HasDedicatedImageFs() (bool, error) { | ||||
| 	return m.dedicatedImageFs, nil | ||||
| } | ||||
|  | ||||
| func newMockNodeProvider(allocatableCapacity v1.ResourceList) *mockNodeProvider { | ||||
| 	return &mockNodeProvider{ | ||||
| 		node: v1.Node{ | ||||
| 			Status: v1.NodeStatus{ | ||||
| 				Allocatable: allocatableCapacity, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type mockNodeProvider struct { | ||||
| 	node v1.Node | ||||
| } | ||||
|  | ||||
| func (m *mockNodeProvider) GetNode() (*v1.Node, error) { | ||||
| 	return &m.node, nil | ||||
| } | ||||
|  | ||||
| // mockImageGC is used to simulate invoking image garbage collection. | ||||
| type mockImageGC struct { | ||||
| 	err     error | ||||
| @@ -175,6 +193,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGC := &mockImageGC{freed: int64(0), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} | ||||
|  | ||||
| @@ -217,7 +236,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") | ||||
|  | ||||
| 	// synchronize | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| @@ -235,7 +254,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	// induce soft threshold | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -250,7 +269,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	// step forward in time pass the grace period | ||||
| 	fakeClock.Step(3 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -275,7 +294,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	// remove memory pressure | ||||
| 	fakeClock.Step(20 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("3Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| @@ -285,7 +304,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	// induce memory pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -313,7 +332,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("2Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -337,7 +356,7 @@ func TestMemoryPressure(t *testing.T) { | ||||
| 	fakeClock.Step(5 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("2Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure (because transition period met) | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| @@ -392,6 +411,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGC := &mockImageGC{freed: int64(0), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} | ||||
|  | ||||
| @@ -433,7 +453,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", ""), "0Gi", "0Gi", "0Gi") | ||||
|  | ||||
| 	// synchronize | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -448,7 +468,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	// induce soft threshold | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -463,7 +483,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	// step forward in time pass the grace period | ||||
| 	fakeClock.Step(3 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -488,7 +508,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	// remove disk pressure | ||||
| 	fakeClock.Step(20 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -498,7 +518,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	// induce disk pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("500Mi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -523,7 +543,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -544,7 +564,7 @@ func TestDiskPressureNodeFs(t *testing.T) { | ||||
| 	fakeClock.Step(5 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure (because transition period met) | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -589,6 +609,7 @@ func TestMinReclaim(t *testing.T) { | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGC := &mockImageGC{freed: int64(0), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} | ||||
|  | ||||
| @@ -622,7 +643,7 @@ func TestMinReclaim(t *testing.T) { | ||||
| 	} | ||||
|  | ||||
| 	// synchronize | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| @@ -632,7 +653,7 @@ func TestMinReclaim(t *testing.T) { | ||||
| 	// induce memory pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -652,7 +673,7 @@ func TestMinReclaim(t *testing.T) { | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1.2Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -672,7 +693,7 @@ func TestMinReclaim(t *testing.T) { | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("2Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -688,7 +709,7 @@ func TestMinReclaim(t *testing.T) { | ||||
| 	fakeClock.Step(5 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("2Gi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure (because transition period met) | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| @@ -727,6 +748,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGcFree := resource.MustParse("700Mi") | ||||
| 	imageGC := &mockImageGC{freed: imageGcFree.Value(), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} | ||||
| @@ -761,7 +783,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	} | ||||
|  | ||||
| 	// synchronize | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -771,7 +793,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	// induce hard threshold | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -794,7 +816,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	// remove disk pressure | ||||
| 	fakeClock.Step(20 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -804,7 +826,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	// induce disk pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -830,7 +852,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) | ||||
| 	imageGC.invoked = false // reset state | ||||
| 	podKiller.pod = nil     // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -852,7 +874,7 @@ func TestNodeReclaimFuncs(t *testing.T) { | ||||
| 	summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) | ||||
| 	imageGC.invoked = false // reset state | ||||
| 	podKiller.pod = nil     // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure (because transition period met) | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -920,6 +942,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGC := &mockImageGC{freed: int64(0), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} | ||||
|  | ||||
| @@ -961,7 +984,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", ""), "0", "0", "0") | ||||
|  | ||||
| 	// synchronize | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -976,7 +999,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	// induce soft threshold | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -991,7 +1014,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	// step forward in time pass the grace period | ||||
| 	fakeClock.Step(3 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -1016,7 +1039,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	// remove inode pressure | ||||
| 	fakeClock.Step(20 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -1026,7 +1049,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	// induce inode pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("0.5Mi", "4Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -1051,7 +1074,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have disk pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderDiskPressure() { | ||||
| @@ -1072,7 +1095,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { | ||||
| 	fakeClock.Step(5 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have disk pressure (because transition period met) | ||||
| 	if manager.IsUnderDiskPressure() { | ||||
| @@ -1120,6 +1143,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGC := &mockImageGC{freed: int64(0), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{ | ||||
| 		Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "", | ||||
| @@ -1164,7 +1188,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { | ||||
| 	// induce soft threshold | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -1179,7 +1203,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { | ||||
| 	// step forward in time pass the grace period | ||||
| 	fakeClock.Step(3 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("1500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -1197,7 +1221,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { | ||||
| 	// remove memory pressure | ||||
| 	fakeClock.Step(20 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("3Gi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| @@ -1210,7 +1234,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { | ||||
| 	// induce memory pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker("500Mi", podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| @@ -1222,3 +1246,167 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { | ||||
| 		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TestAllocatableMemoryPressure | ||||
| func TestAllocatableMemoryPressure(t *testing.T) { | ||||
| 	podMaker := makePodWithMemoryStats | ||||
| 	summaryStatsMaker := makeMemoryStats | ||||
| 	constantCapacity := "4Gi" | ||||
| 	podsToMake := []podToMake{ | ||||
| 		{name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "200Mi"}, | ||||
| 		{name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "400Mi"}, | ||||
| 		{name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, | ||||
| 		{name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "500Mi"}, | ||||
| 		{name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, | ||||
| 		{name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "200Mi"}, | ||||
| 	} | ||||
| 	pods := []*v1.Pod{} | ||||
| 	podStats := map[*v1.Pod]statsapi.PodStats{} | ||||
| 	for _, podToMake := range podsToMake { | ||||
| 		pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) | ||||
| 		pods = append(pods, pod) | ||||
| 		podStats[pod] = podStat | ||||
| 	} | ||||
| 	podToEvict := pods[5] | ||||
| 	activePodsFunc := func() []*v1.Pod { | ||||
| 		return pods | ||||
| 	} | ||||
|  | ||||
| 	fakeClock := clock.NewFakeClock(time.Now()) | ||||
| 	podKiller := &mockPodKiller{} | ||||
| 	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("2Gi")}) | ||||
| 	imageGC := &mockImageGC{freed: int64(0), err: nil} | ||||
| 	nodeRef := &clientv1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} | ||||
|  | ||||
| 	config := Config{ | ||||
| 		MaxPodGracePeriodSeconds: 5, | ||||
| 		PressureTransitionPeriod: time.Minute * 5, | ||||
| 		Thresholds: []evictionapi.Threshold{ | ||||
| 			{ | ||||
| 				Signal:   evictionapi.SignalAllocatableMemoryAvailable, | ||||
| 				Operator: evictionapi.OpLessThan, | ||||
| 				Value: evictionapi.ThresholdValue{ | ||||
| 					Quantity: quantityMustParse("1Ki"), | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(constantCapacity, podStats)} | ||||
| 	manager := &managerImpl{ | ||||
| 		clock:           fakeClock, | ||||
| 		killPodFunc:     podKiller.killPodNow, | ||||
| 		imageGC:         imageGC, | ||||
| 		config:          config, | ||||
| 		recorder:        &record.FakeRecorder{}, | ||||
| 		summaryProvider: summaryProvider, | ||||
| 		nodeRef:         nodeRef, | ||||
| 		nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, | ||||
| 		thresholdsFirstObservedAt:    thresholdsObservedAt{}, | ||||
| 	} | ||||
|  | ||||
| 	// create a best effort pod to test admission | ||||
| 	bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi") | ||||
| 	burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") | ||||
|  | ||||
| 	// synchronize | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| 		t.Errorf("Manager should not report memory pressure") | ||||
| 	} | ||||
|  | ||||
| 	// try to admit our pods (they should succeed) | ||||
| 	expected := []bool{true, true} | ||||
| 	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} { | ||||
| 		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit { | ||||
| 			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// induce memory pressure! | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	pod, podStat := podMaker("guaranteed-high-2", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi"), "1Gi") | ||||
| 	podStats[pod] = podStat | ||||
| 	summaryProvider.result = summaryStatsMaker(constantCapacity, podStats) | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| 		t.Errorf("Manager should report memory pressure") | ||||
| 	} | ||||
|  | ||||
| 	// check the right pod was killed | ||||
| 	if podKiller.pod != podToEvict { | ||||
| 		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) | ||||
| 	} | ||||
| 	observedGracePeriod := *podKiller.gracePeriodOverride | ||||
| 	if observedGracePeriod != int64(0) { | ||||
| 		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 0, observedGracePeriod) | ||||
| 	} | ||||
| 	// reset state | ||||
| 	podKiller.pod = nil | ||||
| 	podKiller.gracePeriodOverride = nil | ||||
|  | ||||
| 	// the best-effort pod should not admit, burstable should | ||||
| 	expected = []bool{false, true} | ||||
| 	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} { | ||||
| 		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit { | ||||
| 			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// reduce memory pressure | ||||
| 	fakeClock.Step(1 * time.Minute) | ||||
| 	for pod := range podStats { | ||||
| 		if pod.Name == "guaranteed-high-2" { | ||||
| 			delete(podStats, pod) | ||||
| 		} | ||||
| 	} | ||||
| 	summaryProvider.result = summaryStatsMaker(constantCapacity, podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should have memory pressure (because transition period not yet met) | ||||
| 	if !manager.IsUnderMemoryPressure() { | ||||
| 		t.Errorf("Manager should report memory pressure") | ||||
| 	} | ||||
|  | ||||
| 	// no pod should have been killed | ||||
| 	if podKiller.pod != nil { | ||||
| 		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) | ||||
| 	} | ||||
|  | ||||
| 	// the best-effort pod should not admit, burstable should | ||||
| 	expected = []bool{false, true} | ||||
| 	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} { | ||||
| 		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit { | ||||
| 			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// move the clock past transition period to ensure that we stop reporting pressure | ||||
| 	fakeClock.Step(5 * time.Minute) | ||||
| 	summaryProvider.result = summaryStatsMaker(constantCapacity, podStats) | ||||
| 	podKiller.pod = nil // reset state | ||||
| 	manager.synchronize(diskInfoProvider, activePodsFunc, nodeProvider) | ||||
|  | ||||
| 	// we should not have memory pressure (because transition period met) | ||||
| 	if manager.IsUnderMemoryPressure() { | ||||
| 		t.Errorf("Manager should not report memory pressure") | ||||
| 	} | ||||
|  | ||||
| 	// no pod should have been killed | ||||
| 	if podKiller.pod != nil { | ||||
| 		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) | ||||
| 	} | ||||
|  | ||||
| 	// all pods should admit now | ||||
| 	expected = []bool{true, true} | ||||
| 	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} { | ||||
| 		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit { | ||||
| 			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -29,6 +29,7 @@ import ( | ||||
| 	"k8s.io/kubernetes/pkg/api" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm" | ||||
| 	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/qos" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/server/stats" | ||||
| @@ -68,6 +69,7 @@ func init() { | ||||
| 	// map eviction signals to node conditions | ||||
| 	signalToNodeCondition = map[evictionapi.Signal]v1.NodeConditionType{} | ||||
| 	signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure | ||||
| 	signalToNodeCondition[evictionapi.SignalAllocatableMemoryAvailable] = v1.NodeMemoryPressure | ||||
| 	signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure | ||||
| 	signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure | ||||
| 	signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure | ||||
| @@ -76,6 +78,7 @@ func init() { | ||||
| 	// map signals to resources (and vice-versa) | ||||
| 	signalToResource = map[evictionapi.Signal]v1.ResourceName{} | ||||
| 	signalToResource[evictionapi.SignalMemoryAvailable] = v1.ResourceMemory | ||||
| 	signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory | ||||
| 	signalToResource[evictionapi.SignalImageFsAvailable] = resourceImageFs | ||||
| 	signalToResource[evictionapi.SignalImageFsInodesFree] = resourceImageFsInodes | ||||
| 	signalToResource[evictionapi.SignalNodeFsAvailable] = resourceNodeFs | ||||
| @@ -93,8 +96,10 @@ func validSignal(signal evictionapi.Signal) bool { | ||||
| } | ||||
|  | ||||
| // ParseThresholdConfig parses the flags for thresholds. | ||||
| func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim string) ([]evictionapi.Threshold, error) { | ||||
| func ParseThresholdConfig(allocatableConfig []string, evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim string) ([]evictionapi.Threshold, error) { | ||||
| 	results := []evictionapi.Threshold{} | ||||
| 	allocatableThresholds := getAllocatableThreshold(allocatableConfig) | ||||
| 	results = append(results, allocatableThresholds...) | ||||
|  | ||||
| 	hardThresholds, err := parseThresholdStatements(evictionHard) | ||||
| 	if err != nil { | ||||
| @@ -214,6 +219,27 @@ func parseThresholdStatement(statement string) (evictionapi.Threshold, error) { | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
| // getAllocatableThreshold returns the thresholds applicable for the allocatable configuration | ||||
| func getAllocatableThreshold(allocatableConfig []string) []evictionapi.Threshold { | ||||
| 	for _, key := range allocatableConfig { | ||||
| 		if key == cm.NodeAllocatableEnforcementKey { | ||||
| 			return []evictionapi.Threshold{ | ||||
| 				{ | ||||
| 					Signal:   evictionapi.SignalAllocatableMemoryAvailable, | ||||
| 					Operator: evictionapi.OpLessThan, | ||||
| 					Value: evictionapi.ThresholdValue{ | ||||
| 						Quantity: resource.NewQuantity(int64(0), resource.BinarySI), | ||||
| 					}, | ||||
| 					MinReclaim: &evictionapi.ThresholdValue{ | ||||
| 						Quantity: resource.NewQuantity(int64(0), resource.BinarySI), | ||||
| 					}, | ||||
| 				}, | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return []evictionapi.Threshold{} | ||||
| } | ||||
|  | ||||
| // parsePercentage parses a string representing a percentage value | ||||
| func parsePercentage(input string) (float32, error) { | ||||
| 	value, err := strconv.ParseFloat(strings.TrimRight(input, "%"), 32) | ||||
| @@ -611,11 +637,15 @@ func (a byEvictionPriority) Less(i, j int) bool { | ||||
| } | ||||
|  | ||||
| // makeSignalObservations derives observations using the specified summary provider. | ||||
| func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObservations, statsFunc, error) { | ||||
| func makeSignalObservations(summaryProvider stats.SummaryProvider, nodeProvider NodeProvider) (signalObservations, statsFunc, error) { | ||||
| 	summary, err := summaryProvider.Get() | ||||
| 	if err != nil { | ||||
| 		return nil, nil, err | ||||
| 	} | ||||
| 	node, err := nodeProvider.GetNode() | ||||
| 	if err != nil { | ||||
| 		return nil, nil, err | ||||
| 	} | ||||
|  | ||||
| 	// build the function to work against for pod stats | ||||
| 	statsFunc := cachedStatsFunc(summary.Pods) | ||||
| @@ -663,6 +693,19 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if memoryAllocatableCapacity, ok := node.Status.Allocatable[v1.ResourceMemory]; ok { | ||||
| 		memoryAllocatableAvailable := memoryAllocatableCapacity.Copy() | ||||
| 		for _, pod := range summary.Pods { | ||||
| 			mu, err := podMemoryUsage(pod) | ||||
| 			if err == nil { | ||||
| 				memoryAllocatableAvailable.Sub(mu[v1.ResourceMemory]) | ||||
| 			} | ||||
| 		} | ||||
| 		result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{ | ||||
| 			available: memoryAllocatableAvailable, | ||||
| 			capacity:  memoryAllocatableCapacity.Copy(), | ||||
| 		} | ||||
| 	} | ||||
| 	return result, statsFunc, nil | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -28,6 +28,7 @@ import ( | ||||
| 	"k8s.io/kubernetes/pkg/api" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm" | ||||
| 	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" | ||||
| 	"k8s.io/kubernetes/pkg/quota" | ||||
| ) | ||||
| @@ -40,6 +41,7 @@ func quantityMustParse(value string) *resource.Quantity { | ||||
| func TestParseThresholdConfig(t *testing.T) { | ||||
| 	gracePeriod, _ := time.ParseDuration("30s") | ||||
| 	testCases := map[string]struct { | ||||
| 		allocatableConfig       []string | ||||
| 		evictionHard            string | ||||
| 		evictionSoft            string | ||||
| 		evictionSoftGracePeriod string | ||||
| @@ -48,6 +50,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 		expectThresholds        []evictionapi.Threshold | ||||
| 	}{ | ||||
| 		"no values": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -56,12 +59,23 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"all flag values": { | ||||
| 			allocatableConfig:       []string{cm.NodeAllocatableEnforcementKey}, | ||||
| 			evictionHard:            "memory.available<150Mi", | ||||
| 			evictionSoft:            "memory.available<300Mi", | ||||
| 			evictionSoftGracePeriod: "memory.available=30s", | ||||
| 			evictionMinReclaim:      "memory.available=0", | ||||
| 			expectErr:               false, | ||||
| 			expectThresholds: []evictionapi.Threshold{ | ||||
| 				{ | ||||
| 					Signal:   evictionapi.SignalAllocatableMemoryAvailable, | ||||
| 					Operator: evictionapi.OpLessThan, | ||||
| 					Value: evictionapi.ThresholdValue{ | ||||
| 						Quantity: quantityMustParse("0"), | ||||
| 					}, | ||||
| 					MinReclaim: &evictionapi.ThresholdValue{ | ||||
| 						Quantity: quantityMustParse("0"), | ||||
| 					}, | ||||
| 				}, | ||||
| 				{ | ||||
| 					Signal:   evictionapi.SignalMemoryAvailable, | ||||
| 					Operator: evictionapi.OpLessThan, | ||||
| @@ -86,6 +100,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			}, | ||||
| 		}, | ||||
| 		"all flag values in percentages": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "memory.available<10%", | ||||
| 			evictionSoft:            "memory.available<30%", | ||||
| 			evictionSoftGracePeriod: "memory.available=30s", | ||||
| @@ -116,6 +131,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			}, | ||||
| 		}, | ||||
| 		"disk flag values": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "imagefs.available<150Mi,nodefs.available<100Mi", | ||||
| 			evictionSoft:            "imagefs.available<300Mi,nodefs.available<200Mi", | ||||
| 			evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s", | ||||
| @@ -167,6 +183,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			}, | ||||
| 		}, | ||||
| 		"disk flag values in percentages": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "imagefs.available<15%,nodefs.available<10.5%", | ||||
| 			evictionSoft:            "imagefs.available<30%,nodefs.available<20.5%", | ||||
| 			evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s", | ||||
| @@ -218,6 +235,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			}, | ||||
| 		}, | ||||
| 		"inode flag values": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "imagefs.inodesFree<150Mi,nodefs.inodesFree<100Mi", | ||||
| 			evictionSoft:            "imagefs.inodesFree<300Mi,nodefs.inodesFree<200Mi", | ||||
| 			evictionSoftGracePeriod: "imagefs.inodesFree=30s,nodefs.inodesFree=30s", | ||||
| @@ -269,6 +287,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			}, | ||||
| 		}, | ||||
| 		"invalid-signal": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "mem.available<150Mi", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -277,6 +296,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"hard-signal-negative": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "memory.available<-150Mi", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -285,6 +305,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"hard-signal-negative-percentage": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "memory.available<-15%", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -293,6 +314,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"soft-signal-negative": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "", | ||||
| 			evictionSoft:            "memory.available<-150Mi", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -301,6 +323,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"duplicate-signal": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "memory.available<150Mi,memory.available<100Mi", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -309,6 +332,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"valid-and-invalid-signal": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "memory.available<150Mi,invalid.foo<150Mi", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -317,6 +341,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"soft-no-grace-period": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "", | ||||
| 			evictionSoft:            "memory.available<150Mi", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -325,6 +350,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"soft-neg-grace-period": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "", | ||||
| 			evictionSoft:            "memory.available<150Mi", | ||||
| 			evictionSoftGracePeriod: "memory.available=-30s", | ||||
| @@ -333,6 +359,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"neg-reclaim": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -341,6 +368,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 			expectThresholds:        []evictionapi.Threshold{}, | ||||
| 		}, | ||||
| 		"duplicate-reclaim": { | ||||
| 			allocatableConfig:       []string{}, | ||||
| 			evictionHard:            "", | ||||
| 			evictionSoft:            "", | ||||
| 			evictionSoftGracePeriod: "", | ||||
| @@ -350,7 +378,7 @@ func TestParseThresholdConfig(t *testing.T) { | ||||
| 		}, | ||||
| 	} | ||||
| 	for testName, testCase := range testCases { | ||||
| 		thresholds, err := ParseThresholdConfig(testCase.evictionHard, testCase.evictionSoft, testCase.evictionSoftGracePeriod, testCase.evictionMinReclaim) | ||||
| 		thresholds, err := ParseThresholdConfig(testCase.allocatableConfig, testCase.evictionHard, testCase.evictionSoft, testCase.evictionSoftGracePeriod, testCase.evictionMinReclaim) | ||||
| 		if testCase.expectErr != (err != nil) { | ||||
| 			t.Errorf("Err not as expected, test: %v, error expected: %v, actual: %v", testName, testCase.expectErr, err) | ||||
| 		} | ||||
| @@ -699,6 +727,7 @@ func TestMakeSignalObservations(t *testing.T) { | ||||
| 	} | ||||
| 	nodeAvailableBytes := uint64(1024 * 1024 * 1024) | ||||
| 	nodeWorkingSetBytes := uint64(1024 * 1024 * 1024) | ||||
| 	allocatableMemoryCapacity := uint64(5 * 1024 * 1024 * 1024) | ||||
| 	imageFsAvailableBytes := uint64(1024 * 1024) | ||||
| 	imageFsCapacityBytes := uint64(1024 * 1024 * 2) | ||||
| 	nodeFsAvailableBytes := uint64(1024) | ||||
| @@ -738,15 +767,31 @@ func TestMakeSignalObservations(t *testing.T) { | ||||
| 		podMaker("pod1", "ns2", "uuid2", 1), | ||||
| 		podMaker("pod3", "ns3", "uuid3", 1), | ||||
| 	} | ||||
| 	containerWorkingSetBytes := int64(1024 * 1024) | ||||
| 	containerWorkingSetBytes := int64(1024 * 1024 * 1024) | ||||
| 	for _, pod := range pods { | ||||
| 		fakeStats.Pods = append(fakeStats.Pods, newPodStats(pod, containerWorkingSetBytes)) | ||||
| 	} | ||||
| 	actualObservations, statsFunc, err := makeSignalObservations(provider) | ||||
| 	res := quantityMustParse("5Gi") | ||||
| 	nodeProvider := newMockNodeProvider(v1.ResourceList{v1.ResourceMemory: *res}) | ||||
| 	// Allocatable thresholds are always 100%.  Verify that Threshold == Capacity. | ||||
| 	if res.CmpInt64(int64(allocatableMemoryCapacity)) != 0 { | ||||
| 		t.Errorf("Expected Threshold %v to be equal to value %v", res.Value(), allocatableMemoryCapacity) | ||||
| 	} | ||||
| 	actualObservations, statsFunc, err := makeSignalObservations(provider, nodeProvider) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		t.Errorf("Unexpected err: %v", err) | ||||
| 	} | ||||
| 	allocatableMemQuantity, found := actualObservations[evictionapi.SignalAllocatableMemoryAvailable] | ||||
| 	if !found { | ||||
| 		t.Errorf("Expected allocatable memory observation, but didnt find one") | ||||
| 	} | ||||
| 	if allocatableMemQuantity.available.Value() != 2*containerWorkingSetBytes { | ||||
| 		t.Errorf("Expected %v, actual: %v", containerWorkingSetBytes, allocatableMemQuantity.available.Value()) | ||||
| 	} | ||||
| 	if expectedBytes := int64(allocatableMemoryCapacity); allocatableMemQuantity.capacity.Value() != expectedBytes { | ||||
| 		t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.capacity.Value()) | ||||
| 	} | ||||
| 	memQuantity, found := actualObservations[evictionapi.SignalMemoryAvailable] | ||||
| 	if !found { | ||||
| 		t.Errorf("Expected available memory observation: %v", err) | ||||
|   | ||||
| @@ -53,7 +53,7 @@ type Config struct { | ||||
| // Manager evaluates when an eviction threshold for node stability has been met on the node. | ||||
| type Manager interface { | ||||
| 	// Start starts the control loop to monitor eviction thresholds at specified interval. | ||||
| 	Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, monitoringInterval time.Duration) | ||||
| 	Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, nodeProvider NodeProvider, monitoringInterval time.Duration) | ||||
|  | ||||
| 	// IsUnderMemoryPressure returns true if the node is under memory pressure. | ||||
| 	IsUnderMemoryPressure() bool | ||||
| @@ -68,6 +68,12 @@ type DiskInfoProvider interface { | ||||
| 	HasDedicatedImageFs() (bool, error) | ||||
| } | ||||
|  | ||||
| // NodeProvider is responsible for providing the node api object describing this node | ||||
| type NodeProvider interface { | ||||
| 	// GetNode returns the node info for this node | ||||
| 	GetNode() (*v1.Node, error) | ||||
| } | ||||
|  | ||||
| // ImageGC is responsible for performing garbage collection of unused images. | ||||
| type ImageGC interface { | ||||
| 	// DeleteUnusedImages deletes unused images and returns the number of bytes freed, or an error. | ||||
|   | ||||
| @@ -349,7 +349,12 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub | ||||
| 		RootFreeDiskMB:   int(kubeCfg.LowDiskSpaceThresholdMB), | ||||
| 	} | ||||
|  | ||||
| 	thresholds, err := eviction.ParseThresholdConfig(kubeCfg.EvictionHard, kubeCfg.EvictionSoft, kubeCfg.EvictionSoftGracePeriod, kubeCfg.EvictionMinimumReclaim) | ||||
| 	enforceNodeAllocatable := kubeCfg.EnforceNodeAllocatable | ||||
| 	if kubeCfg.ExperimentalNodeAllocatableIgnoreEvictionThreshold { | ||||
| 		// Do not provide kubeCfg.EnforceNodeAllocatable to eviction threshold parsing if we are not enforcing Evictions | ||||
| 		enforceNodeAllocatable = []string{} | ||||
| 	} | ||||
| 	thresholds, err := eviction.ParseThresholdConfig(enforceNodeAllocatable, kubeCfg.EvictionHard, kubeCfg.EvictionSoft, kubeCfg.EvictionSoftGracePeriod, kubeCfg.EvictionMinimumReclaim) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| @@ -1222,7 +1227,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() { | ||||
| 		glog.Fatalf("Failed to start cAdvisor %v", err) | ||||
| 	} | ||||
| 	// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs | ||||
| 	kl.evictionManager.Start(kl, kl.getActivePods, evictionMonitoringPeriod) | ||||
| 	kl.evictionManager.Start(kl, kl.getActivePods, kl, evictionMonitoringPeriod) | ||||
| } | ||||
|  | ||||
| // Run starts the kubelet reacting to config updates | ||||
|   | ||||
| @@ -57,6 +57,7 @@ go_library( | ||||
| go_test( | ||||
|     name = "go_default_test", | ||||
|     srcs = [ | ||||
|         "allocatable_eviction_test.go", | ||||
|         "apparmor_test.go", | ||||
|         "container_manager_test.go", | ||||
|         "critical_pod_test.go", | ||||
|   | ||||
							
								
								
									
										104
									
								
								test/e2e_node/allocatable_eviction_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								test/e2e_node/allocatable_eviction_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | ||||
| /* | ||||
| Copyright 2017 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package e2e_node | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"time" | ||||
|  | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	"k8s.io/kubernetes/pkg/apis/componentconfig" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
|  | ||||
| 	. "github.com/onsi/ginkgo" | ||||
| 	. "github.com/onsi/gomega" | ||||
| ) | ||||
|  | ||||
| // Eviction Policy is described here: | ||||
| // https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md | ||||
|  | ||||
| var _ = framework.KubeDescribe("AllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() { | ||||
| 	f := framework.NewDefaultFramework("allocatable-eviction-test") | ||||
|  | ||||
| 	podTestSpecs := []podTestSpec{ | ||||
| 		{ | ||||
| 			evictionPriority: 1, // This pod should be evicted before the innocent pod | ||||
| 			pod:              *getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}), | ||||
| 		}, | ||||
| 		{ | ||||
| 			evictionPriority: 0, // This pod should never be evicted | ||||
| 			pod: v1.Pod{ | ||||
| 				ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"}, | ||||
| 				Spec: v1.PodSpec{ | ||||
| 					RestartPolicy: v1.RestartPolicyNever, | ||||
| 					Containers: []v1.Container{ | ||||
| 						{ | ||||
| 							Image: "gcr.io/google_containers/busybox:1.24", | ||||
| 							Name:  "normal-memory-usage-container", | ||||
| 							Command: []string{ | ||||
| 								"sh", | ||||
| 								"-c", //make one big (5 Gb) file | ||||
| 								"dd if=/dev/urandom of=largefile bs=5000000000 count=1; while true; do sleep 5; done", | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	evictionTestTimeout := 40 * time.Minute | ||||
| 	testCondition := "Memory Pressure" | ||||
| 	kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) { | ||||
| 		initialConfig.EvictionHard = "memory.available<10%" | ||||
| 		// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds. | ||||
| 		initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"}) | ||||
| 		initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"}) | ||||
| 		initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey} | ||||
| 		initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false | ||||
| 		initialConfig.CgroupsPerQOS = true | ||||
| 	} | ||||
| 	runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure, kubeletConfigUpdate) | ||||
| }) | ||||
|  | ||||
| // Returns TRUE if the node has Memory Pressure, FALSE otherwise | ||||
| func hasMemoryPressure(f *framework.Framework, testCondition string) (bool, error) { | ||||
| 	localNodeStatus := getLocalNode(f).Status | ||||
| 	_, pressure := v1.GetNodeCondition(&localNodeStatus, v1.NodeMemoryPressure) | ||||
| 	Expect(pressure).NotTo(BeNil()) | ||||
| 	hasPressure := pressure.Status == v1.ConditionTrue | ||||
| 	By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure)) | ||||
|  | ||||
| 	// Additional Logging relating to Memory | ||||
| 	summary, err := getNodeSummary() | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil { | ||||
| 		framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes) | ||||
| 	} | ||||
| 	for _, pod := range summary.Pods { | ||||
| 		framework.Logf("Pod: %s", pod.PodRef.Name) | ||||
| 		for _, container := range pod.Containers { | ||||
| 			if container.Memory != nil && container.Memory.WorkingSetBytes != nil { | ||||
| 				framework.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return hasPressure, nil | ||||
| } | ||||
| @@ -22,6 +22,7 @@ import ( | ||||
|  | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	"k8s.io/kubernetes/pkg/apis/componentconfig" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
|  | ||||
| 	. "github.com/onsi/ginkgo" | ||||
| @@ -112,10 +113,11 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flak | ||||
| 	} | ||||
| 	evictionTestTimeout := 30 * time.Minute | ||||
| 	testCondition := "Disk Pressure due to Inodes" | ||||
| 	// Set the EvictionHard threshold lower to decrease test time | ||||
| 	evictionHardLimit := "nodefs.inodesFree<50%" | ||||
| 	kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) { | ||||
| 		initialConfig.EvictionHard = "nodefs.inodesFree<50%" | ||||
| 	} | ||||
|  | ||||
| 	runEvictionTest(f, testCondition, podTestSpecs, evictionHardLimit, evictionTestTimeout, hasInodePressure) | ||||
| 	runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure, kubeletConfigUpdate) | ||||
| }) | ||||
|  | ||||
| // Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods | ||||
| @@ -133,12 +135,12 @@ type podTestSpec struct { | ||||
| //		It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.) | ||||
| //		It ensures that all lower evictionPriority pods are eventually evicted. | ||||
| // runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists | ||||
| func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionHard string, | ||||
| 	evictionTestTimeout time.Duration, hasPressureCondition func(*framework.Framework, string) (bool, error)) { | ||||
| func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration, | ||||
| 	hasPressureCondition func(*framework.Framework, string) (bool, error), updateFunction func(initialConfig *componentconfig.KubeletConfiguration)) { | ||||
|  | ||||
| 	Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() { | ||||
|  | ||||
| 		tempSetEvictionHard(f, evictionHard) | ||||
| 		tempSetCurrentKubeletConfig(f, updateFunction) | ||||
| 		BeforeEach(func() { | ||||
| 			By("seting up pods to be used by tests") | ||||
| 			for _, spec := range podTestSpecs { | ||||
| @@ -148,6 +150,11 @@ func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs | ||||
| 		}) | ||||
|  | ||||
| 		It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() { | ||||
| 			configEnabled, err := isKubeletConfigEnabled(f) | ||||
| 			framework.ExpectNoError(err) | ||||
| 			if !configEnabled { | ||||
| 				framework.Skipf("Dynamic kubelet config must be enabled for this test to run.") | ||||
| 			} | ||||
| 			Eventually(func() error { | ||||
| 				hasPressure, err := hasPressureCondition(f, testCondition) | ||||
| 				if err != nil { | ||||
| @@ -299,14 +306,8 @@ func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs | ||||
|  | ||||
| // Returns TRUE if the node has disk pressure due to inodes exists on the node, FALSE otherwise | ||||
| func hasInodePressure(f *framework.Framework, testCondition string) (bool, error) { | ||||
|  | ||||
| 	nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{}) | ||||
| 	framework.ExpectNoError(err, "getting node list") | ||||
| 	if len(nodeList.Items) != 1 { | ||||
| 		return false, fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items) | ||||
| 	} | ||||
|  | ||||
| 	_, pressure := v1.GetNodeCondition(&nodeList.Items[0].Status, v1.NodeDiskPressure) | ||||
| 	localNodeStatus := getLocalNode(f).Status | ||||
| 	_, pressure := v1.GetNodeCondition(&localNodeStatus, v1.NodeDiskPressure) | ||||
| 	Expect(pressure).NotTo(BeNil()) | ||||
| 	hasPressure := pressure.Status == v1.ConditionTrue | ||||
| 	By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure)) | ||||
|   | ||||
| @@ -136,7 +136,7 @@ var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", fu | ||||
| 					By("creating a guaranteed pod, a burstable pod, and a besteffort pod.") | ||||
|  | ||||
| 					// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal. | ||||
| 					guaranteed := createMemhogPod(f, "guaranteed-", "guaranteed", v1.ResourceRequirements{ | ||||
| 					guaranteed := getMemhogPod("guaranteed-pod", "guaranteed", v1.ResourceRequirements{ | ||||
| 						Requests: v1.ResourceList{ | ||||
| 							"cpu":    resource.MustParse("100m"), | ||||
| 							"memory": resource.MustParse("100Mi"), | ||||
| @@ -145,16 +145,22 @@ var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", fu | ||||
| 							"cpu":    resource.MustParse("100m"), | ||||
| 							"memory": resource.MustParse("100Mi"), | ||||
| 						}}) | ||||
| 					guaranteed = f.PodClient().CreateSync(guaranteed) | ||||
| 					glog.Infof("pod created with name: %s", guaranteed.Name) | ||||
|  | ||||
| 					// A pod is burstable if limits and requests do not match across all containers. | ||||
| 					burstable := createMemhogPod(f, "burstable-", "burstable", v1.ResourceRequirements{ | ||||
| 					burstable := getMemhogPod("burstable-pod", "burstable", v1.ResourceRequirements{ | ||||
| 						Requests: v1.ResourceList{ | ||||
| 							"cpu":    resource.MustParse("100m"), | ||||
| 							"memory": resource.MustParse("100Mi"), | ||||
| 						}}) | ||||
| 					burstable = f.PodClient().CreateSync(burstable) | ||||
| 					glog.Infof("pod created with name: %s", burstable.Name) | ||||
|  | ||||
| 					// A pod is besteffort if none of its containers have specified any requests or limits. | ||||
| 					besteffort := createMemhogPod(f, "besteffort-", "besteffort", v1.ResourceRequirements{}) | ||||
| 					// A pod is besteffort if none of its containers have specified any requests or limits	. | ||||
| 					besteffort := getMemhogPod("besteffort-pod", "besteffort", v1.ResourceRequirements{}) | ||||
| 					besteffort = f.PodClient().CreateSync(besteffort) | ||||
| 					glog.Infof("pod created with name: %s", besteffort.Name) | ||||
|  | ||||
| 					// We poll until timeout or all pods are killed. | ||||
| 					// Inside the func, we check that all pods are in a valid phase with | ||||
| @@ -232,7 +238,7 @@ var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", fu | ||||
|  | ||||
| }) | ||||
|  | ||||
| func createMemhogPod(f *framework.Framework, genName string, ctnName string, res v1.ResourceRequirements) *v1.Pod { | ||||
| func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod { | ||||
| 	env := []v1.EnvVar{ | ||||
| 		{ | ||||
| 			Name: "MEMORY_LIMIT", | ||||
| @@ -256,9 +262,9 @@ func createMemhogPod(f *framework.Framework, genName string, ctnName string, res | ||||
| 		memLimit = "$(MEMORY_LIMIT)" | ||||
| 	} | ||||
|  | ||||
| 	pod := &v1.Pod{ | ||||
| 	return &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			GenerateName: genName, | ||||
| 			Name: podName, | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			RestartPolicy: v1.RestartPolicyNever, | ||||
| @@ -277,8 +283,4 @@ func createMemhogPod(f *framework.Framework, genName string, ctnName string, res | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	// The generated pod.Name will be on the pod spec returned by CreateSync | ||||
| 	pod = f.PodClient().CreateSync(pod) | ||||
| 	glog.Infof("pod created with name: %s", pod.Name) | ||||
| 	return pod | ||||
| } | ||||
|   | ||||
| @@ -86,13 +86,6 @@ func getCurrentKubeletConfig() (*componentconfig.KubeletConfiguration, error) { | ||||
| 	return kubeCfg, nil | ||||
| } | ||||
|  | ||||
| // Convenience method to set the evictionHard threshold during the current context. | ||||
| func tempSetEvictionHard(f *framework.Framework, evictionHard string) { | ||||
| 	tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) { | ||||
| 		initialConfig.EvictionHard = evictionHard | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // Must be called within a Context. Allows the function to modify the KubeletConfiguration during the BeforeEach of the context. | ||||
| // The change is reverted in the AfterEach of the context. | ||||
| // Returns true on success. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue