Merge pull request #46456 from jingxu97/May/allocatable

Automatic merge from submit-queue

Add local storage (scratch space) allocatable support

This PR adds the support for allocatable local storage (scratch space).
This feature is only for root file system which is shared by kubernetes
componenets, users' containers and/or images. User could use
--kube-reserved flag to reserve the storage for kube system components.
If the allocatable storage for user's pods is used up, some pods will be
evicted to free the storage resource.

This feature is part of local storage capacity isolation and described in the proposal https://github.com/kubernetes/community/pull/306

**Release note**:

```release-note
This feature exposes local storage capacity for the primary partitions, and supports & enforces storage reservation in Node Allocatable 
```
This commit is contained in:
Kubernetes Submit Queue
2017-06-03 00:24:29 -07:00
committed by GitHub
18 changed files with 696 additions and 69 deletions

View File

@@ -78,7 +78,7 @@ var (
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
)
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeResources {
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
return v1.NodeResources{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@@ -86,17 +86,19 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeRes
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceStorage: *resource.NewQuantity(storage, resource.BinarySI),
},
}
}
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.ResourceList {
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceStorage: *resource.NewQuantity(storage, resource.BinarySI),
}
}
@@ -114,6 +116,25 @@ func newResourcePod(usage ...schedulercache.Resource) *v1.Pod {
}
}
func addStorageLimit(pod *v1.Pod, sizeLimit int64, medium v1.StorageMedium) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Containers: pod.Spec.Containers,
Volumes: []v1.Volume{
{
Name: "emptyDirVolumeName",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: *resource.NewQuantity(sizeLimit, resource.BinarySI),
Medium: medium,
},
},
},
},
},
}
}
func newResourceInitPod(pod *v1.Pod, usage ...schedulercache.Resource) *v1.Pod {
pod.Spec.InitContainers = newResourcePod(usage...).Spec.Containers
return pod
@@ -331,7 +352,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@@ -386,7 +407,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@@ -399,6 +420,86 @@ func TestPodFitsResources(t *testing.T) {
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
}
}
storagePodsTests := []struct {
pod *v1.Pod
emptyDirLimit int64
storageMedium v1.StorageMedium
nodeInfo *schedulercache.NodeInfo
fits bool
test string
reasons []algorithm.PredicateFailureReason
}{
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 10, Memory: 10, StorageOverlay: 20})),
fits: false,
test: "due to init container scratch disk",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceCPU, 1, 10, 10),
NewInsufficientResourceError(v1.ResourceStorageScratch, 1, 20, 20),
},
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 10})),
fits: true,
test: "pod fit",
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 18}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
fits: false,
test: "request exceeds allocatable",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceStorageScratch, 18, 5, 20),
},
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
emptyDirLimit: 15,
storageMedium: v1.StorageMediumDefault,
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
fits: false,
test: "storage scratchrequest exceeds allocatable",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
},
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
emptyDirLimit: 15,
storageMedium: v1.StorageMediumMemory,
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
fits: true,
test: "storage scratchrequest exceeds allocatable",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
},
},
}
for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
test.nodeInfo.SetNode(&node)
pod := addStorageLimit(test.pod, test.emptyDirLimit, test.storageMedium)
fits, reasons, err := PodFitsResources(pod, PredicateMetadata(pod, nil), test.nodeInfo)
if err != nil {
t.Errorf("%s: unexpected error: %v", test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, test.reasons) {
t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.test, reasons, test.reasons)
}
if fits != test.fits {
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
}
}
}
func TestPodFitsHost(t *testing.T) {
@@ -1845,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
},
fits: true,
wErr: nil,
@@ -1857,7 +1958,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
},
fits: false,
wErr: nil,
@@ -1871,7 +1972,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits on GPU machine",
@@ -1880,7 +1981,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
fits: false,
wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@@ -1890,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
fits: true,
wErr: nil,
test: "enough GPU resource",
@@ -1904,7 +2005,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
},
fits: false,
wErr: nil,
@@ -1916,7 +2017,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
},
fits: false,
wErr: nil,
@@ -3251,7 +3352,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always",
// at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0),
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
},
},
},