Merge pull request #46456 from jingxu97/May/allocatable

Automatic merge from submit-queue Add local storage (scratch space) allocatable support This PR adds the support for allocatable local storage (scratch space). This feature is only for root file system which is shared by kubernetes componenets, users' containers and/or images. User could use --kube-reserved flag to reserve the storage for kube system components. If the allocatable storage for user's pods is used up, some pods will be evicted to free the storage resource. This feature is part of local storage capacity isolation and described in the proposal https://github.com/kubernetes/community/pull/306 **Release note**: ```release-note This feature exposes local storage capacity for the primary partitions, and supports & enforces storage reservation in Node Allocatable ```
2017-06-03 00:24:29 -07:00
parent 822e29dd3c 943fc53bf7
commit b8c9ee8abb
18 changed files with 696 additions and 69 deletions
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
@@ -78,7 +78,7 @@ var (
 	opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
 )

-func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeResources {
+func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
 	return v1.NodeResources{
 		Capacity: v1.ResourceList{
 			v1.ResourceCPU:       *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@@ -86,17 +86,19 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeRes
 			v1.ResourcePods:      *resource.NewQuantity(pods, resource.DecimalSI),
 			v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 			opaqueResourceA:      *resource.NewQuantity(opaqueA, resource.DecimalSI),
+			v1.ResourceStorage:   *resource.NewQuantity(storage, resource.BinarySI),
 		},
 	}
 }

-func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.ResourceList {
+func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
 	return v1.ResourceList{
 		v1.ResourceCPU:       *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
 		v1.ResourceMemory:    *resource.NewQuantity(memory, resource.BinarySI),
 		v1.ResourcePods:      *resource.NewQuantity(pods, resource.DecimalSI),
 		v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 		opaqueResourceA:      *resource.NewQuantity(opaqueA, resource.DecimalSI),
+		v1.ResourceStorage:   *resource.NewQuantity(storage, resource.BinarySI),
 	}
 }

@@ -114,6 +116,25 @@ func newResourcePod(usage ...schedulercache.Resource) *v1.Pod {
 	}
 }

+func addStorageLimit(pod *v1.Pod, sizeLimit int64, medium v1.StorageMedium) *v1.Pod {
+	return &v1.Pod{
+		Spec: v1.PodSpec{
+			Containers: pod.Spec.Containers,
+			Volumes: []v1.Volume{
+				{
+					Name: "emptyDirVolumeName",
+					VolumeSource: v1.VolumeSource{
+						EmptyDir: &v1.EmptyDirVolumeSource{
+							SizeLimit: *resource.NewQuantity(sizeLimit, resource.BinarySI),
+							Medium:    medium,
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
 func newResourceInitPod(pod *v1.Pod, usage ...schedulercache.Resource) *v1.Pod {
 	pod.Spec.InitContainers = newResourcePod(usage...).Spec.Containers
 	return pod
@@ -331,7 +352,7 @@ func TestPodFitsResources(t *testing.T) {
 	}

 	for _, test := range enoughPodsTests {
-		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5)}}
+		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
 		test.nodeInfo.SetNode(&node)
 		fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
 		if err != nil {
@@ -386,7 +407,7 @@ func TestPodFitsResources(t *testing.T) {
 		},
 	}
 	for _, test := range notEnoughPodsTests {
-		node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0)}}
+		node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
 		test.nodeInfo.SetNode(&node)
 		fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
 		if err != nil {
@@ -399,6 +420,86 @@ func TestPodFitsResources(t *testing.T) {
 			t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
 		}
 	}
+
+	storagePodsTests := []struct {
+		pod           *v1.Pod
+		emptyDirLimit int64
+		storageMedium v1.StorageMedium
+		nodeInfo      *schedulercache.NodeInfo
+		fits          bool
+		test          string
+		reasons       []algorithm.PredicateFailureReason
+	}{
+		{
+			pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 1}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 10, Memory: 10, StorageOverlay: 20})),
+			fits: false,
+			test: "due to init container scratch disk",
+			reasons: []algorithm.PredicateFailureReason{
+				NewInsufficientResourceError(v1.ResourceCPU, 1, 10, 10),
+				NewInsufficientResourceError(v1.ResourceStorageScratch, 1, 20, 20),
+			},
+		},
+		{
+			pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 10})),
+			fits: true,
+			test: "pod fit",
+		},
+		{
+			pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 18}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
+			fits: false,
+			test: "request exceeds allocatable",
+			reasons: []algorithm.PredicateFailureReason{
+				NewInsufficientResourceError(v1.ResourceStorageScratch, 18, 5, 20),
+			},
+		},
+		{
+			pod:           newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
+			emptyDirLimit: 15,
+			storageMedium: v1.StorageMediumDefault,
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
+			fits: false,
+			test: "storage scratchrequest exceeds allocatable",
+			reasons: []algorithm.PredicateFailureReason{
+				NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
+			},
+		},
+		{
+			pod:           newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
+			emptyDirLimit: 15,
+			storageMedium: v1.StorageMediumMemory,
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
+			fits: true,
+			test: "storage scratchrequest exceeds allocatable",
+			reasons: []algorithm.PredicateFailureReason{
+				NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
+			},
+		},
+	}
+
+	for _, test := range storagePodsTests {
+		node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
+		test.nodeInfo.SetNode(&node)
+		pod := addStorageLimit(test.pod, test.emptyDirLimit, test.storageMedium)
+		fits, reasons, err := PodFitsResources(pod, PredicateMetadata(pod, nil), test.nodeInfo)
+		if err != nil {
+			t.Errorf("%s: unexpected error: %v", test.test, err)
+		}
+		if !fits && !reflect.DeepEqual(reasons, test.reasons) {
+			t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.test, reasons, test.reasons)
+		}
+		if fits != test.fits {
+			t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
+		}
+	}
+
 }

 func TestPodFitsHost(t *testing.T) {
@@ -1845,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
 			},
 			fits: true,
 			wErr: nil,
@@ -1857,7 +1958,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
 			},
 			fits: false,
 			wErr: nil,
@@ -1871,7 +1972,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			pod: &v1.Pod{},
 			nodeInfo: schedulercache.NewNodeInfo(
 				newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
-			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
+			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
 			fits: true,
 			wErr: nil,
 			test: "no resources/port/host requested always fits on GPU machine",
@@ -1880,7 +1981,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
 			nodeInfo: schedulercache.NewNodeInfo(
 				newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
-			node:    &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
+			node:    &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
 			fits:    false,
 			wErr:    nil,
 			reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@@ -1890,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
 			nodeInfo: schedulercache.NewNodeInfo(
 				newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
-			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
+			node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
 			fits: true,
 			wErr: nil,
 			test: "enough GPU resource",
@@ -1904,7 +2005,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
 			},
 			fits:    false,
 			wErr:    nil,
@@ -1916,7 +2017,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
 			node: &v1.Node{
 				ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
-				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
+				Status:     v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
 			},
 			fits:    false,
 			wErr:    nil,
@@ -3251,7 +3352,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
 					ImagePullPolicy: "Always",
 					// at least one requirement -> burstable pod
 					Resources: v1.ResourceRequirements{
-						Requests: makeAllocatableResources(100, 100, 100, 100, 0),
+						Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
 					},
 				},
 			},