Support opaque integer resource accounting.

- Prevents kubelet from overwriting capacity during sync.
- Handles opaque integer resources in the scheduler.
  - Adds scheduler predicate tests for opaque resources.
- Validates opaque int resources:
  - Ensures supplied opaque int quantities in node capacity,
    node allocatable, pod request and pod limit are integers.
  - Adds tests for new validation logic (node update and pod spec).
- Added e2e tests for opaque integer resources.
This commit is contained in:
Connor Doyle
2016-09-26 08:11:31 -07:00
parent 1cba31af40
commit c93646e8da
11 changed files with 883 additions and 66 deletions

View File

@@ -430,19 +430,53 @@ func (c *VolumeZoneChecker) predicate(pod *api.Pod, meta interface{}, nodeInfo *
func GetResourceRequest(pod *api.Pod) *schedulercache.Resource {
result := schedulercache.Resource{}
for _, container := range pod.Spec.Containers {
requests := container.Resources.Requests
result.Memory += requests.Memory().Value()
result.MilliCPU += requests.Cpu().MilliValue()
result.NvidiaGPU += requests.NvidiaGPU().Value()
for rName, rQuantity := range container.Resources.Requests {
switch rName {
case api.ResourceMemory:
result.Memory += rQuantity.Value()
case api.ResourceCPU:
result.MilliCPU += rQuantity.MilliValue()
case api.ResourceNvidiaGPU:
result.NvidiaGPU += rQuantity.Value()
default:
if api.IsOpaqueIntResourceName(rName) {
// Lazily allocate this map only if required.
if result.OpaqueIntResources == nil {
result.OpaqueIntResources = map[api.ResourceName]int64{}
}
result.OpaqueIntResources[rName] += rQuantity.Value()
}
}
}
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
requests := container.Resources.Requests
if mem := requests.Memory().Value(); mem > result.Memory {
result.Memory = mem
}
if cpu := requests.Cpu().MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
for rName, rQuantity := range container.Resources.Requests {
switch rName {
case api.ResourceMemory:
if mem := rQuantity.Value(); mem > result.Memory {
result.Memory = mem
}
case api.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
}
case api.ResourceNvidiaGPU:
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu
}
default:
if api.IsOpaqueIntResourceName(rName) {
// Lazily allocate this map only if required.
if result.OpaqueIntResources == nil {
result.OpaqueIntResources = map[api.ResourceName]int64{}
}
value := rQuantity.Value()
if value > result.OpaqueIntResources[rName] {
result.OpaqueIntResources[rName] = value
}
}
}
}
}
return &result
@@ -471,7 +505,7 @@ func PodFitsResources(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.N
// We couldn't parse metadata - fallback to computing it.
podRequest = GetResourceRequest(pod)
}
if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 {
if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && len(podRequest.OpaqueIntResources) == 0 {
return len(predicateFails) == 0, predicateFails, nil
}
@@ -485,6 +519,12 @@ func PodFitsResources(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.N
if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU))
}
for rName, rQuant := range podRequest.OpaqueIntResources {
if allocatable.OpaqueIntResources[rName] < rQuant+nodeInfo.RequestedResource().OpaqueIntResources[rName] {
predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.OpaqueIntResources[rName], nodeInfo.RequestedResource().OpaqueIntResources[rName], allocatable.OpaqueIntResources[rName]))
}
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.

View File

@@ -74,23 +74,30 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*api.P
return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
}
func makeResources(milliCPU int64, memory int64, nvidiaGPUs int64, pods int64) api.NodeResources {
var (
opaqueResourceA = api.OpaqueIntResourceName("AAA")
opaqueResourceB = api.OpaqueIntResourceName("BBB")
)
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) api.NodeResources {
return api.NodeResources{
Capacity: api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
api.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
api.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
},
}
}
func makeAllocatableResources(milliCPU int64, memory int64, nvidiaGPUs int64, pods int64) api.ResourceList {
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA int64) api.ResourceList {
return api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
api.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
api.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
}
}
@@ -98,13 +105,7 @@ func newResourcePod(usage ...schedulercache.Resource) *api.Pod {
containers := []api.Container{}
for _, req := range usage {
containers = append(containers, api.Container{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(req.MilliCPU, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(req.Memory, resource.BinarySI),
api.ResourceNvidiaGPU: *resource.NewQuantity(req.NvidiaGPU, resource.DecimalSI),
},
},
Resources: api.ResourceRequirements{Requests: req.ResourceList()},
})
}
return &api.Pod{
@@ -233,10 +234,105 @@ func TestPodFitsResources(t *testing.T) {
fits: true,
test: "equal edge case for init container",
},
{
pod: newResourcePod(schedulercache.Resource{OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 1}}),
nodeInfo: schedulercache.NewNodeInfo(newResourcePod(schedulercache.Resource{})),
fits: true,
test: "opaque resource fits",
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}), schedulercache.Resource{OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 1}}),
nodeInfo: schedulercache.NewNodeInfo(newResourcePod(schedulercache.Resource{})),
fits: true,
test: "opaque resource fits for init container",
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 0}})),
fits: false,
test: "opaque resource capacity enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceA, 10, 0, 5)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 0}})),
fits: false,
test: "opaque resource capacity enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceA, 10, 0, 5)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 1}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 5}})),
fits: false,
test: "opaque resource allocatable enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceA, 1, 5, 5)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 1}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 5}})),
fits: false,
test: "opaque resource allocatable enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceA, 1, 5, 5)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 3}},
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 3}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 2}})),
fits: false,
test: "opaque resource allocatable enforced for multiple containers",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceA, 6, 2, 5)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 3}},
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 3}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 2}})),
fits: true,
test: "opaque resource allocatable admits multiple init containers",
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 6}},
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 3}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceA: 2}})),
fits: false,
test: "opaque resource allocatable enforced for multiple init containers",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceA, 6, 2, 5)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceB: 1}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0})),
fits: false,
test: "opaque resource allocatable enforced for unknown resource",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, OpaqueIntResources: map[api.ResourceName]int64{opaqueResourceB: 1}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0})),
fits: false,
test: "opaque resource allocatable enforced for unknown resource for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)},
},
}
for _, test := range enoughPodsTests {
node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)}}
node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@@ -291,7 +387,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1)}}
node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@@ -1739,7 +1835,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &api.Node{
ObjectMeta: api.ObjectMeta{Name: "machine1"},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
},
fits: true,
wErr: nil,
@@ -1751,7 +1847,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &api.Node{
ObjectMeta: api.ObjectMeta{Name: "machine1"},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
},
fits: false,
wErr: nil,
@@ -1765,7 +1861,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: &api.Pod{},
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits on GPU machine",
@@ -1774,7 +1870,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
fits: false,
wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(api.ResourceNvidiaGPU, 1, 1, 1)},
@@ -1784,7 +1880,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0)}},
fits: true,
wErr: nil,
test: "enough GPU resource",
@@ -1798,7 +1894,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(),
node: &api.Node{
ObjectMeta: api.ObjectMeta{Name: "machine1"},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
},
fits: false,
wErr: nil,
@@ -1810,7 +1906,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &api.Node{
ObjectMeta: api.ObjectMeta{Name: "machine1"},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0)},
},
fits: false,
wErr: nil,
@@ -2897,7 +2993,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always",
// at least one requirement -> burstable pod
Resources: api.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100),
Requests: makeAllocatableResources(100, 100, 100, 100, 0),
},
},
},