Merge pull request #46456 from jingxu97/May/allocatable

Automatic merge from submit-queue Add local storage (scratch space) allocatable support This PR adds the support for allocatable local storage (scratch space). This feature is only for root file system which is shared by kubernetes componenets, users' containers and/or images. User could use --kube-reserved flag to reserve the storage for kube system components. If the allocatable storage for user's pods is used up, some pods will be evicted to free the storage resource. This feature is part of local storage capacity isolation and described in the proposal https://github.com/kubernetes/community/pull/306 **Release note**: ```release-note This feature exposes local storage capacity for the primary partitions, and supports & enforces storage reservation in Node Allocatable ```
2017-06-03 00:24:29 -07:00
parent 822e29dd3c 943fc53bf7
commit b8c9ee8abb
18 changed files with 696 additions and 69 deletions
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@@ -510,6 +510,8 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
 				result.MilliCPU += rQuantity.MilliValue()
 			case v1.ResourceNvidiaGPU:
 				result.NvidiaGPU += rQuantity.Value()
+			case v1.ResourceStorageOverlay:
+				result.StorageOverlay += rQuantity.Value()
 			default:
 				if v1helper.IsOpaqueIntResourceName(rName) {
 					result.AddOpaque(rName, rQuantity.Value())
@@ -517,6 +519,15 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
 			}
 		}
 	}
+	// Account for storage requested by emptydir volumes
+	// If the storage medium is memory, should exclude the size
+	for _, vol := range pod.Spec.Volumes {
+		if vol.EmptyDir != nil && vol.EmptyDir.Medium != v1.StorageMediumMemory {
+
+			result.StorageScratch += vol.EmptyDir.SizeLimit.Value()
+		}
+	}
+
 	// take max_resource(sum_pod, any_init_container)
 	for _, container := range pod.Spec.InitContainers {
 		for rName, rQuantity := range container.Resources.Requests {
@@ -533,6 +544,10 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
 				if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
 					result.NvidiaGPU = gpu
 				}
+			case v1.ResourceStorageOverlay:
+				if overlay := rQuantity.Value(); overlay > result.StorageOverlay {
+					result.StorageOverlay = overlay
+				}
 			default:
 				if v1helper.IsOpaqueIntResourceName(rName) {
 					value := rQuantity.Value()
@@ -583,6 +598,23 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
 	if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
 		predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU))
 	}
+
+	scratchSpaceRequest := podRequest.StorageScratch
+	if allocatable.StorageOverlay == 0 {
+		scratchSpaceRequest += podRequest.StorageOverlay
+		//scratchSpaceRequest += nodeInfo.RequestedResource().StorageOverlay
+		nodeScratchRequest := nodeInfo.RequestedResource().StorageOverlay + nodeInfo.RequestedResource().StorageScratch
+		if allocatable.StorageScratch < scratchSpaceRequest+nodeScratchRequest {
+			predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceStorageScratch, scratchSpaceRequest, nodeScratchRequest, allocatable.StorageScratch))
+		}
+
+	} else if allocatable.StorageScratch < scratchSpaceRequest+nodeInfo.RequestedResource().StorageScratch {
+		predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceStorageScratch, scratchSpaceRequest, nodeInfo.RequestedResource().StorageScratch, allocatable.StorageScratch))
+	}
+	if allocatable.StorageOverlay > 0 && allocatable.StorageOverlay < podRequest.StorageOverlay+nodeInfo.RequestedResource().StorageOverlay {
+		predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceStorageOverlay, podRequest.StorageOverlay, nodeInfo.RequestedResource().StorageOverlay, allocatable.StorageOverlay))
+	}
+
 	for rName, rQuant := range podRequest.OpaqueIntResources {
 		if allocatable.OpaqueIntResources[rName] < rQuant+nodeInfo.RequestedResource().OpaqueIntResources[rName] {
 			predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.OpaqueIntResources[rName], nodeInfo.RequestedResource().OpaqueIntResources[rName], allocatable.OpaqueIntResources[rName]))