WIP v0 NVIDIA GPU support

Implements part of #24071 I am not familiar with the scheduler enough to know what to do with the scores. Punting for now. Missing items from the implementation plan: limitranger, rkt support, kubectl support and user docs
2016-04-26 20:54:19 -04:00
parent 57359e4f2f
commit 362c763fca
21 changed files with 858 additions and 666 deletions
--- a/plugin/pkg/scheduler/algorithm/predicates/error.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/error.go
@@ -19,9 +19,10 @@ package predicates
 import "fmt"

 const (
-	podCountResourceName string = "PodCount"
-	cpuResourceName      string = "CPU"
-	memoryResoureceName  string = "Memory"
+	podCountResourceName  string = "PodCount"
+	cpuResourceName       string = "CPU"
+	memoryResoureceName   string = "Memory"
+	nvidiaGpuResourceName string = "NvidiaGpu"
 )

 var (
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@@ -346,8 +346,9 @@ func (c *VolumeZoneChecker) predicate(pod *api.Pod, nodeInfo *schedulercache.Nod
 }

 type resourceRequest struct {
-	milliCPU int64
-	memory   int64
+	milliCPU  int64
+	memory    int64
+	nvidiaGPU int64
 }

 func getResourceRequest(pod *api.Pod) resourceRequest {
@@ -356,19 +357,23 @@ func getResourceRequest(pod *api.Pod) resourceRequest {
 		requests := container.Resources.Requests
 		result.memory += requests.Memory().Value()
 		result.milliCPU += requests.Cpu().MilliValue()
+		result.nvidiaGPU += requests.NvidiaGPU().Value()
 	}
 	return result
 }

-func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory []*api.Pod) {
+func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory, notFittingNvidiaGPU []*api.Pod) {
 	totalMilliCPU := allocatable.Cpu().MilliValue()
 	totalMemory := allocatable.Memory().Value()
+	totalNvidiaGPU := allocatable.NvidiaGPU().Value()
 	milliCPURequested := int64(0)
 	memoryRequested := int64(0)
+	nvidiaGPURequested := int64(0)
 	for _, pod := range pods {
 		podRequest := getResourceRequest(pod)
 		fitsCPU := (totalMilliCPU - milliCPURequested) >= podRequest.milliCPU
 		fitsMemory := (totalMemory - memoryRequested) >= podRequest.memory
+		fitsNVidiaGPU := (totalNvidiaGPU - nvidiaGPURequested) >= podRequest.nvidiaGPU
 		if !fitsCPU {
 			// the pod doesn't fit due to CPU request
 			notFittingCPU = append(notFittingCPU, pod)
@@ -379,9 +384,15 @@ func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceLi
 			notFittingMemory = append(notFittingMemory, pod)
 			continue
 		}
+		if !fitsNVidiaGPU {
+			// the pod doesn't fit due to NvidiaGPU request
+			notFittingNvidiaGPU = append(notFittingNvidiaGPU, pod)
+			continue
+		}
 		// the pod fits
 		milliCPURequested += podRequest.milliCPU
 		memoryRequested += podRequest.memory
+		nvidiaGPURequested += podRequest.nvidiaGPU
 		fitting = append(fitting, pod)
 	}
 	return
@@ -403,12 +414,13 @@ func PodFitsResources(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, er
 			newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber)
 	}
 	podRequest := getResourceRequest(pod)
-	if podRequest.milliCPU == 0 && podRequest.memory == 0 {
+	if podRequest.milliCPU == 0 && podRequest.memory == 0 && podRequest.nvidiaGPU == 0 {
 		return true, nil
 	}

 	totalMilliCPU := allocatable.Cpu().MilliValue()
 	totalMemory := allocatable.Memory().Value()
+	totalNvidiaGPU := allocatable.NvidiaGPU().Value()

 	if totalMilliCPU < podRequest.milliCPU+nodeInfo.RequestedResource().MilliCPU {
 		return false,
@@ -418,6 +430,10 @@ func PodFitsResources(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, er
 		return false,
 			newInsufficientResourceError(memoryResoureceName, podRequest.memory, nodeInfo.RequestedResource().Memory, totalMemory)
 	}
+	if totalNvidiaGPU < podRequest.nvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
+		return false,
+			newInsufficientResourceError(nvidiaGpuResourceName, podRequest.nvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, totalNvidiaGPU)
+	}
 	glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.",
 		podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber)
 	return true, nil
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
@@ -71,21 +71,23 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*api.P
 	return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
 }

-func makeResources(milliCPU int64, memory int64, pods int64) api.NodeResources {
+func makeResources(milliCPU int64, memory int64, nvidiaGPUs int64, pods int64) api.NodeResources {
 	return api.NodeResources{
 		Capacity: api.ResourceList{
-			api.ResourceCPU:    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
-			api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
-			api.ResourcePods:   *resource.NewQuantity(pods, resource.DecimalSI),
+			api.ResourceCPU:       *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+			api.ResourceMemory:    *resource.NewQuantity(memory, resource.BinarySI),
+			api.ResourcePods:      *resource.NewQuantity(pods, resource.DecimalSI),
+			api.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 		},
 	}
 }

-func makeAllocatableResources(milliCPU int64, memory int64, pods int64) api.ResourceList {
+func makeAllocatableResources(milliCPU int64, memory int64, nvidiaGPUs int64, pods int64) api.ResourceList {
 	return api.ResourceList{
-		api.ResourceCPU:    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
-		api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
-		api.ResourcePods:   *resource.NewQuantity(pods, resource.DecimalSI),
+		api.ResourceCPU:       *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+		api.ResourceMemory:    *resource.NewQuantity(memory, resource.BinarySI),
+		api.ResourcePods:      *resource.NewQuantity(pods, resource.DecimalSI),
+		api.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 	}
 }

@@ -95,8 +97,9 @@ func newResourcePod(usage ...resourceRequest) *api.Pod {
 		containers = append(containers, api.Container{
 			Resources: api.ResourceRequirements{
 				Requests: api.ResourceList{
-					api.ResourceCPU:    *resource.NewMilliQuantity(req.milliCPU, resource.DecimalSI),
-					api.ResourceMemory: *resource.NewQuantity(req.memory, resource.BinarySI),
+					api.ResourceCPU:       *resource.NewMilliQuantity(req.milliCPU, resource.DecimalSI),
+					api.ResourceMemory:    *resource.NewQuantity(req.memory, resource.BinarySI),
+					api.ResourceNvidiaGPU: *resource.NewQuantity(req.nvidiaGPU, resource.DecimalSI),
 				},
 			},
 		})
@@ -159,7 +162,7 @@ func TestPodFitsResources(t *testing.T) {
 	}

 	for _, test := range enoughPodsTests {
-		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}}
+		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)}}
 		test.nodeInfo.SetNode(&node)

 		fits, err := PodFitsResources(test.pod, test.nodeInfo)
@@ -204,7 +207,7 @@ func TestPodFitsResources(t *testing.T) {
 		},
 	}
 	for _, test := range notEnoughPodsTests {
-		node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1)}}
+		node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1)}}
 		test.nodeInfo.SetNode(&node)

 		fits, err := PodFitsResources(test.pod, test.nodeInfo)
@@ -1529,7 +1532,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(resourceRequest{milliCPU: 9, memory: 19})),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: true,
 			wErr: nil,
@@ -1541,12 +1544,39 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(resourceRequest{milliCPU: 5, memory: 19})),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: false,
 			wErr: newInsufficientResourceError("CPU", 8, 5, 10),
 			test: "not enough cpu resource",
 		},
+		{
+			pod: &api.Pod{},
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(resourceRequest{milliCPU: 9, memory: 19})),
+			node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
+			fits: true,
+			wErr: nil,
+			test: "no resources/port/host requested always fits on GPU machine",
+		},
+		{
+			pod: newResourcePod(resourceRequest{milliCPU: 3, memory: 1, nvidiaGPU: 1}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(resourceRequest{milliCPU: 5, memory: 10, nvidiaGPU: 1})),
+			node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
+			fits: false,
+			wErr: newInsufficientResourceError("NvidiaGpu", 1, 1, 1),
+			test: "not enough GPU resource",
+		},
+		{
+			pod: newResourcePod(resourceRequest{milliCPU: 3, memory: 1, nvidiaGPU: 1}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(resourceRequest{milliCPU: 5, memory: 10, nvidiaGPU: 0})),
+			node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
+			fits: true,
+			wErr: nil,
+			test: "enough GPU resource",
+		},
 		{
 			pod: &api.Pod{
 				Spec: api.PodSpec{
@@ -1556,7 +1586,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: false,
 			wErr: ErrPodNotMatchHostName,
@@ -1567,7 +1597,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: false,
 			wErr: ErrPodNotFitsHostPorts,