Merge pull request #46456 from jingxu97/May/allocatable
Automatic merge from submit-queue Add local storage (scratch space) allocatable support This PR adds the support for allocatable local storage (scratch space). This feature is only for root file system which is shared by kubernetes componenets, users' containers and/or images. User could use --kube-reserved flag to reserve the storage for kube system components. If the allocatable storage for user's pods is used up, some pods will be evicted to free the storage resource. This feature is part of local storage capacity isolation and described in the proposal https://github.com/kubernetes/community/pull/306 **Release note**: ```release-note This feature exposes local storage capacity for the primary partitions, and supports & enforces storage reservation in Node Allocatable ```
This commit is contained in:
@@ -78,7 +78,7 @@ var (
|
||||
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
|
||||
)
|
||||
|
||||
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeResources {
|
||||
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
|
||||
return v1.NodeResources{
|
||||
Capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||
@@ -86,17 +86,19 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeRes
|
||||
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
|
||||
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
|
||||
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
|
||||
v1.ResourceStorage: *resource.NewQuantity(storage, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.ResourceList {
|
||||
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
|
||||
return v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
|
||||
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
|
||||
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
|
||||
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
|
||||
v1.ResourceStorage: *resource.NewQuantity(storage, resource.BinarySI),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,6 +116,25 @@ func newResourcePod(usage ...schedulercache.Resource) *v1.Pod {
|
||||
}
|
||||
}
|
||||
|
||||
func addStorageLimit(pod *v1.Pod, sizeLimit int64, medium v1.StorageMedium) *v1.Pod {
|
||||
return &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Containers: pod.Spec.Containers,
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: "emptyDirVolumeName",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: *resource.NewQuantity(sizeLimit, resource.BinarySI),
|
||||
Medium: medium,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func newResourceInitPod(pod *v1.Pod, usage ...schedulercache.Resource) *v1.Pod {
|
||||
pod.Spec.InitContainers = newResourcePod(usage...).Spec.Containers
|
||||
return pod
|
||||
@@ -331,7 +352,7 @@ func TestPodFitsResources(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, test := range enoughPodsTests {
|
||||
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5)}}
|
||||
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
|
||||
test.nodeInfo.SetNode(&node)
|
||||
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
|
||||
if err != nil {
|
||||
@@ -386,7 +407,7 @@ func TestPodFitsResources(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, test := range notEnoughPodsTests {
|
||||
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0)}}
|
||||
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
|
||||
test.nodeInfo.SetNode(&node)
|
||||
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
|
||||
if err != nil {
|
||||
@@ -399,6 +420,86 @@ func TestPodFitsResources(t *testing.T) {
|
||||
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
|
||||
}
|
||||
}
|
||||
|
||||
storagePodsTests := []struct {
|
||||
pod *v1.Pod
|
||||
emptyDirLimit int64
|
||||
storageMedium v1.StorageMedium
|
||||
nodeInfo *schedulercache.NodeInfo
|
||||
fits bool
|
||||
test string
|
||||
reasons []algorithm.PredicateFailureReason
|
||||
}{
|
||||
{
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 1}),
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 10, Memory: 10, StorageOverlay: 20})),
|
||||
fits: false,
|
||||
test: "due to init container scratch disk",
|
||||
reasons: []algorithm.PredicateFailureReason{
|
||||
NewInsufficientResourceError(v1.ResourceCPU, 1, 10, 10),
|
||||
NewInsufficientResourceError(v1.ResourceStorageScratch, 1, 20, 20),
|
||||
},
|
||||
},
|
||||
{
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 10})),
|
||||
fits: true,
|
||||
test: "pod fit",
|
||||
},
|
||||
{
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 18}),
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
|
||||
fits: false,
|
||||
test: "request exceeds allocatable",
|
||||
reasons: []algorithm.PredicateFailureReason{
|
||||
NewInsufficientResourceError(v1.ResourceStorageScratch, 18, 5, 20),
|
||||
},
|
||||
},
|
||||
{
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
|
||||
emptyDirLimit: 15,
|
||||
storageMedium: v1.StorageMediumDefault,
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
|
||||
fits: false,
|
||||
test: "storage scratchrequest exceeds allocatable",
|
||||
reasons: []algorithm.PredicateFailureReason{
|
||||
NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
|
||||
},
|
||||
},
|
||||
{
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
|
||||
emptyDirLimit: 15,
|
||||
storageMedium: v1.StorageMediumMemory,
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
|
||||
fits: true,
|
||||
test: "storage scratchrequest exceeds allocatable",
|
||||
reasons: []algorithm.PredicateFailureReason{
|
||||
NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range storagePodsTests {
|
||||
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
|
||||
test.nodeInfo.SetNode(&node)
|
||||
pod := addStorageLimit(test.pod, test.emptyDirLimit, test.storageMedium)
|
||||
fits, reasons, err := PodFitsResources(pod, PredicateMetadata(pod, nil), test.nodeInfo)
|
||||
if err != nil {
|
||||
t.Errorf("%s: unexpected error: %v", test.test, err)
|
||||
}
|
||||
if !fits && !reflect.DeepEqual(reasons, test.reasons) {
|
||||
t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.test, reasons, test.reasons)
|
||||
}
|
||||
if fits != test.fits {
|
||||
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestPodFitsHost(t *testing.T) {
|
||||
@@ -1845,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
|
||||
},
|
||||
fits: true,
|
||||
wErr: nil,
|
||||
@@ -1857,7 +1958,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
|
||||
},
|
||||
fits: false,
|
||||
wErr: nil,
|
||||
@@ -1871,7 +1972,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
pod: &v1.Pod{},
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
|
||||
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
|
||||
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
|
||||
fits: true,
|
||||
wErr: nil,
|
||||
test: "no resources/port/host requested always fits on GPU machine",
|
||||
@@ -1880,7 +1981,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
|
||||
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
|
||||
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
|
||||
fits: false,
|
||||
wErr: nil,
|
||||
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
|
||||
@@ -1890,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
|
||||
nodeInfo: schedulercache.NewNodeInfo(
|
||||
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
|
||||
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
|
||||
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
|
||||
fits: true,
|
||||
wErr: nil,
|
||||
test: "enough GPU resource",
|
||||
@@ -1904,7 +2005,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
nodeInfo: schedulercache.NewNodeInfo(),
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
|
||||
},
|
||||
fits: false,
|
||||
wErr: nil,
|
||||
@@ -1916,7 +2017,7 @@ func TestRunGeneralPredicates(t *testing.T) {
|
||||
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
|
||||
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
|
||||
},
|
||||
fits: false,
|
||||
wErr: nil,
|
||||
@@ -3251,7 +3352,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
|
||||
ImagePullPolicy: "Always",
|
||||
// at least one requirement -> burstable pod
|
||||
Resources: v1.ResourceRequirements{
|
||||
Requests: makeAllocatableResources(100, 100, 100, 100, 0),
|
||||
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user