Add predicates check for local storage request

This PR adds the check for local storage request when admitting pods. If
the local storage request exceeds the available resource, pod will be
rejected.
This commit is contained in:
Jing Xu 2017-05-30 12:41:31 -07:00
parent dd67e96c01
commit 943fc53bf7
11 changed files with 240 additions and 57 deletions

View File

@ -291,7 +291,7 @@ func (c *kubeletConfiguration) addFlags(fs *pflag.FlagSet) {
// Node Allocatable Flags // Node Allocatable Flags
fs.Var(&c.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]") fs.Var(&c.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(&c.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]") fs.Var(&c.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi, storage=1Gi) pairs that describe resources reserved for kubernetes system components. Currently cpu, memory and local storage for root file system are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details.") fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details.")
fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']") fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']") fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")

View File

@ -914,7 +914,7 @@ func parseResourceList(m componentconfig.ConfigurationMap) (v1.ResourceList, err
rl := make(v1.ResourceList) rl := make(v1.ResourceList)
for k, v := range m { for k, v := range m {
switch v1.ResourceName(k) { switch v1.ResourceName(k) {
// Only CPU and memory resources are supported. // CPU, memory and local storage resources are supported.
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceStorage: case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceStorage:
q, err := resource.ParseQuantity(v) q, err := resource.ParseQuantity(v)
if err != nil { if err != nil {

View File

@ -551,7 +551,7 @@ type KubeletConfiguration struct {
SystemReserved map[string]string `json:"systemReserved"` SystemReserved map[string]string `json:"systemReserved"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs // A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components. // that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none] // Currently cpu, memory and local storage for root file system are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. // See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved map[string]string `json:"kubeReserved"` KubeReserved map[string]string `json:"kubeReserved"`

View File

@ -43,3 +43,12 @@ func StorageScratchCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList
} }
return c return c
} }
func StorageOverlayCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
c := v1.ResourceList{
v1.ResourceStorageOverlay: *resource.NewQuantity(
int64(info.Capacity),
resource.BinarySI),
}
return c
}

View File

@ -185,12 +185,14 @@ func (cm *containerManagerImpl) getNodeAllocatableAbsolute() v1.ResourceList {
func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList { func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList {
evictionReservation := hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity) evictionReservation := hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
if _, ok := cm.capacity[v1.ResourceStorage]; !ok { if _, ok := cm.capacity[v1.ResourceStorage]; !ok {
if rootfs, err := cm.cadvisorInterface.RootFsInfo(); err == nil { if cm.cadvisorInterface != nil {
for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) { if rootfs, err := cm.cadvisorInterface.RootFsInfo(); err == nil {
cm.capacity[rName] = rCap for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) {
cm.capacity[rName] = rCap
}
} else {
glog.Warning("Error getting rootfs info: %v", err)
} }
} else {
glog.Warning("Error getting rootfs info: %v", err)
} }
} }
result := make(v1.ResourceList) result := make(v1.ResourceList)

View File

@ -76,6 +76,16 @@ func TestParseThresholdConfig(t *testing.T) {
Quantity: quantityMustParse("0"), Quantity: quantityMustParse("0"),
}, },
}, },
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
},
{ {
Signal: evictionapi.SignalMemoryAvailable, Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan, Operator: evictionapi.OpLessThan,
@ -777,8 +787,7 @@ func TestMakeSignalObservations(t *testing.T) {
if res.CmpInt64(int64(allocatableMemoryCapacity)) != 0 { if res.CmpInt64(int64(allocatableMemoryCapacity)) != 0 {
t.Errorf("Expected Threshold %v to be equal to value %v", res.Value(), allocatableMemoryCapacity) t.Errorf("Expected Threshold %v to be equal to value %v", res.Value(), allocatableMemoryCapacity)
} }
actualObservations, statsFunc, err := makeSignalObservations(provider, nodeProvider) actualObservations, statsFunc, err := makeSignalObservations(provider, nodeProvider, pods, false)
if err != nil { if err != nil {
t.Errorf("Unexpected err: %v", err) t.Errorf("Unexpected err: %v", err)
} }

View File

@ -208,14 +208,16 @@ func TestUpdateNewNodeStatus(t *testing.T) {
KubeProxyVersion: version.Get().String(), KubeProxyVersion: version.Get().String(),
}, },
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Allocatable: v1.ResourceList{ Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "127.0.0.1"}, {Type: v1.NodeInternalIP, Address: "127.0.0.1"},
@ -361,14 +363,16 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
}, },
}, },
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(3000, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(3000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Allocatable: v1.ResourceList{ Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2800, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(2800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
}, },
} }
@ -444,14 +448,16 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
KubeProxyVersion: version.Get().String(), KubeProxyVersion: version.Get().String(),
}, },
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Allocatable: v1.ResourceList{ Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "127.0.0.1"}, {Type: v1.NodeInternalIP, Address: "127.0.0.1"},
@ -655,8 +661,9 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
kubelet.containerManager = &localCM{ kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(), ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{ allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
v1.ResourceStorage: *resource.NewQuantity(200*mb, resource.BinarySI),
}, },
} }
@ -727,14 +734,16 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
KubeProxyVersion: version.Get().String(), KubeProxyVersion: version.Get().String(),
}, },
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Allocatable: v1.ResourceList{ Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(300*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "127.0.0.1"}, {Type: v1.NodeInternalIP, Address: "127.0.0.1"},
@ -1141,14 +1150,16 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) {
Spec: v1.NodeSpec{}, Spec: v1.NodeSpec{},
Status: v1.NodeStatus{ Status: v1.NodeStatus{
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
Allocatable: v1.ResourceList{ Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(0, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourceStorage: *resource.NewQuantity(500*mb, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
}, },
}, },
} }

View File

@ -508,6 +508,8 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
result.MilliCPU += rQuantity.MilliValue() result.MilliCPU += rQuantity.MilliValue()
case v1.ResourceNvidiaGPU: case v1.ResourceNvidiaGPU:
result.NvidiaGPU += rQuantity.Value() result.NvidiaGPU += rQuantity.Value()
case v1.ResourceStorageOverlay:
result.StorageOverlay += rQuantity.Value()
default: default:
if v1helper.IsOpaqueIntResourceName(rName) { if v1helper.IsOpaqueIntResourceName(rName) {
result.AddOpaque(rName, rQuantity.Value()) result.AddOpaque(rName, rQuantity.Value())
@ -515,6 +517,15 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
} }
} }
} }
// Account for storage requested by emptydir volumes
// If the storage medium is memory, should exclude the size
for _, vol := range pod.Spec.Volumes {
if vol.EmptyDir != nil && vol.EmptyDir.Medium != v1.StorageMediumMemory {
result.StorageScratch += vol.EmptyDir.SizeLimit.Value()
}
}
// take max_resource(sum_pod, any_init_container) // take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers { for _, container := range pod.Spec.InitContainers {
for rName, rQuantity := range container.Resources.Requests { for rName, rQuantity := range container.Resources.Requests {
@ -531,6 +542,10 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU { if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu result.NvidiaGPU = gpu
} }
case v1.ResourceStorageOverlay:
if overlay := rQuantity.Value(); overlay > result.StorageOverlay {
result.StorageOverlay = overlay
}
default: default:
if v1helper.IsOpaqueIntResourceName(rName) { if v1helper.IsOpaqueIntResourceName(rName) {
value := rQuantity.Value() value := rQuantity.Value()
@ -581,6 +596,23 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU { if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU)) predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU))
} }
scratchSpaceRequest := podRequest.StorageScratch
if allocatable.StorageOverlay == 0 {
scratchSpaceRequest += podRequest.StorageOverlay
//scratchSpaceRequest += nodeInfo.RequestedResource().StorageOverlay
nodeScratchRequest := nodeInfo.RequestedResource().StorageOverlay + nodeInfo.RequestedResource().StorageScratch
if allocatable.StorageScratch < scratchSpaceRequest+nodeScratchRequest {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceStorageScratch, scratchSpaceRequest, nodeScratchRequest, allocatable.StorageScratch))
}
} else if allocatable.StorageScratch < scratchSpaceRequest+nodeInfo.RequestedResource().StorageScratch {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceStorageScratch, scratchSpaceRequest, nodeInfo.RequestedResource().StorageScratch, allocatable.StorageScratch))
}
if allocatable.StorageOverlay > 0 && allocatable.StorageOverlay < podRequest.StorageOverlay+nodeInfo.RequestedResource().StorageOverlay {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceStorageOverlay, podRequest.StorageOverlay, nodeInfo.RequestedResource().StorageOverlay, allocatable.StorageOverlay))
}
for rName, rQuant := range podRequest.OpaqueIntResources { for rName, rQuant := range podRequest.OpaqueIntResources {
if allocatable.OpaqueIntResources[rName] < rQuant+nodeInfo.RequestedResource().OpaqueIntResources[rName] { if allocatable.OpaqueIntResources[rName] < rQuant+nodeInfo.RequestedResource().OpaqueIntResources[rName] {
predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.OpaqueIntResources[rName], nodeInfo.RequestedResource().OpaqueIntResources[rName], allocatable.OpaqueIntResources[rName])) predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.OpaqueIntResources[rName], nodeInfo.RequestedResource().OpaqueIntResources[rName], allocatable.OpaqueIntResources[rName]))

View File

@ -76,7 +76,7 @@ var (
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB") opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
) )
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeResources { func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
return v1.NodeResources{ return v1.NodeResources{
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@ -84,17 +84,19 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.NodeRes
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI), v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI), opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceStorage: *resource.NewQuantity(storage, resource.BinarySI),
}, },
} }
} }
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) v1.ResourceList { func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
return v1.ResourceList{ return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI), v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI), opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceStorage: *resource.NewQuantity(storage, resource.BinarySI),
} }
} }
@ -112,6 +114,25 @@ func newResourcePod(usage ...schedulercache.Resource) *v1.Pod {
} }
} }
func addStorageLimit(pod *v1.Pod, sizeLimit int64, medium v1.StorageMedium) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Containers: pod.Spec.Containers,
Volumes: []v1.Volume{
{
Name: "emptyDirVolumeName",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: *resource.NewQuantity(sizeLimit, resource.BinarySI),
Medium: medium,
},
},
},
},
},
}
}
func newResourceInitPod(pod *v1.Pod, usage ...schedulercache.Resource) *v1.Pod { func newResourceInitPod(pod *v1.Pod, usage ...schedulercache.Resource) *v1.Pod {
pod.Spec.InitContainers = newResourcePod(usage...).Spec.Containers pod.Spec.InitContainers = newResourcePod(usage...).Spec.Containers
return pod return pod
@ -329,7 +350,7 @@ func TestPodFitsResources(t *testing.T) {
} }
for _, test := range enoughPodsTests { for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5)}} node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
test.nodeInfo.SetNode(&node) test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil { if err != nil {
@ -384,7 +405,7 @@ func TestPodFitsResources(t *testing.T) {
}, },
} }
for _, test := range notEnoughPodsTests { for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0)}} node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
test.nodeInfo.SetNode(&node) test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil { if err != nil {
@ -397,6 +418,86 @@ func TestPodFitsResources(t *testing.T) {
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits) t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
} }
} }
storagePodsTests := []struct {
pod *v1.Pod
emptyDirLimit int64
storageMedium v1.StorageMedium
nodeInfo *schedulercache.NodeInfo
fits bool
test string
reasons []algorithm.PredicateFailureReason
}{
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 10, Memory: 10, StorageOverlay: 20})),
fits: false,
test: "due to init container scratch disk",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceCPU, 1, 10, 10),
NewInsufficientResourceError(v1.ResourceStorageScratch, 1, 20, 20),
},
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 10})),
fits: true,
test: "pod fit",
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 18}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
fits: false,
test: "request exceeds allocatable",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceStorageScratch, 18, 5, 20),
},
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
emptyDirLimit: 15,
storageMedium: v1.StorageMediumDefault,
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
fits: false,
test: "storage scratchrequest exceeds allocatable",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
},
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 1, Memory: 1, StorageOverlay: 10}),
emptyDirLimit: 15,
storageMedium: v1.StorageMediumMemory,
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 2, Memory: 2, StorageOverlay: 5})),
fits: true,
test: "storage scratchrequest exceeds allocatable",
reasons: []algorithm.PredicateFailureReason{
NewInsufficientResourceError(v1.ResourceStorageScratch, 25, 5, 20),
},
},
}
for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
test.nodeInfo.SetNode(&node)
pod := addStorageLimit(test.pod, test.emptyDirLimit, test.storageMedium)
fits, reasons, err := PodFitsResources(pod, PredicateMetadata(pod, nil), test.nodeInfo)
if err != nil {
t.Errorf("%s: unexpected error: %v", test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, test.reasons) {
t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.test, reasons, test.reasons)
}
if fits != test.fits {
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
}
}
} }
func TestPodFitsHost(t *testing.T) { func TestPodFitsHost(t *testing.T) {
@ -1843,7 +1944,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})), newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
}, },
fits: true, fits: true,
wErr: nil, wErr: nil,
@ -1855,7 +1956,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})), newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
}, },
fits: false, fits: false,
wErr: nil, wErr: nil,
@ -1869,7 +1970,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: &v1.Pod{}, pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo( nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})), newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}}, node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
fits: true, fits: true,
wErr: nil, wErr: nil,
test: "no resources/port/host requested always fits on GPU machine", test: "no resources/port/host requested always fits on GPU machine",
@ -1878,7 +1979,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}), pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo( nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})), newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}}, node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
fits: false, fits: false,
wErr: nil, wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)}, reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@ -1888,7 +1989,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}), pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo( nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})), newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}}, node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
fits: true, fits: true,
wErr: nil, wErr: nil,
test: "enough GPU resource", test: "enough GPU resource",
@ -1902,7 +2003,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(), nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
}, },
fits: false, fits: false,
wErr: nil, wErr: nil,
@ -1914,7 +2015,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)), nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{ node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)}, Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
}, },
fits: false, fits: false,
wErr: nil, wErr: nil,
@ -3249,7 +3350,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always", ImagePullPolicy: "Always",
// at least one requirement -> burstable pod // at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{ Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0), Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
}, },
}, },
}, },

View File

@ -76,9 +76,10 @@ type Resource struct {
func (r *Resource) ResourceList() v1.ResourceList { func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{ result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI), v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(r.NvidiaGPU, resource.DecimalSI), v1.ResourceNvidiaGPU: *resource.NewQuantity(r.NvidiaGPU, resource.DecimalSI),
v1.ResourceStorageOverlay: *resource.NewQuantity(r.StorageOverlay, resource.BinarySI),
} }
for rName, rQuant := range r.OpaqueIntResources { for rName, rQuant := range r.OpaqueIntResources {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI) result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
@ -264,6 +265,8 @@ func (n *NodeInfo) addPod(pod *v1.Pod) {
n.requestedResource.MilliCPU += res.MilliCPU n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory n.requestedResource.Memory += res.Memory
n.requestedResource.NvidiaGPU += res.NvidiaGPU n.requestedResource.NvidiaGPU += res.NvidiaGPU
n.requestedResource.StorageOverlay += res.StorageOverlay
n.requestedResource.StorageScratch += res.StorageScratch
if n.requestedResource.OpaqueIntResources == nil && len(res.OpaqueIntResources) > 0 { if n.requestedResource.OpaqueIntResources == nil && len(res.OpaqueIntResources) > 0 {
n.requestedResource.OpaqueIntResources = map[v1.ResourceName]int64{} n.requestedResource.OpaqueIntResources = map[v1.ResourceName]int64{}
} }
@ -349,6 +352,8 @@ func calculateResource(pod *v1.Pod) (res Resource, non0_cpu int64, non0_mem int6
res.Memory += rQuant.Value() res.Memory += rQuant.Value()
case v1.ResourceNvidiaGPU: case v1.ResourceNvidiaGPU:
res.NvidiaGPU += rQuant.Value() res.NvidiaGPU += rQuant.Value()
case v1.ResourceStorageOverlay:
res.StorageOverlay += rQuant.Value()
default: default:
if v1helper.IsOpaqueIntResourceName(rName) { if v1helper.IsOpaqueIntResourceName(rName) {
res.AddOpaque(rName, rQuant.Value()) res.AddOpaque(rName, rQuant.Value())
@ -361,6 +366,15 @@ func calculateResource(pod *v1.Pod) (res Resource, non0_cpu int64, non0_mem int6
non0_mem += non0_mem_req non0_mem += non0_mem_req
// No non-zero resources for GPUs or opaque resources. // No non-zero resources for GPUs or opaque resources.
} }
// Account for storage requested by emptydir volumes
// If the storage medium is memory, should exclude the size
for _, vol := range pod.Spec.Volumes {
if vol.EmptyDir != nil && vol.EmptyDir.Medium != v1.StorageMediumMemory {
res.StorageScratch += vol.EmptyDir.SizeLimit.Value()
}
}
return return
} }
@ -391,6 +405,10 @@ func (n *NodeInfo) SetNode(node *v1.Node) error {
n.allocatableResource.NvidiaGPU = rQuant.Value() n.allocatableResource.NvidiaGPU = rQuant.Value()
case v1.ResourcePods: case v1.ResourcePods:
n.allowedPodNumber = int(rQuant.Value()) n.allowedPodNumber = int(rQuant.Value())
case v1.ResourceStorage:
n.allocatableResource.StorageScratch = rQuant.Value()
case v1.ResourceStorageOverlay:
n.allocatableResource.StorageOverlay = rQuant.Value()
default: default:
if v1helper.IsOpaqueIntResourceName(rName) { if v1helper.IsOpaqueIntResourceName(rName) {
n.allocatableResource.SetOpaque(rName, rQuant.Value()) n.allocatableResource.SetOpaque(rName, rQuant.Value())

View File

@ -76,6 +76,7 @@ go_test(
"inode_eviction_test.go", "inode_eviction_test.go",
"kubelet_test.go", "kubelet_test.go",
"lifecycle_hook_test.go", "lifecycle_hook_test.go",
"local_storage_allocatable_eviction_test.go",
"log_path_test.go", "log_path_test.go",
"memory_eviction_test.go", "memory_eviction_test.go",
"mirror_pod_test.go", "mirror_pod_test.go",