From e0f89a322b5fb6e9a8b4d8b66ada79d17e021651 Mon Sep 17 00:00:00 2001
From: Pengfei Ni <feiskyer@gmail.com>
Date: Mon, 31 Oct 2016 16:05:02 +0800
Subject: [PATCH] CRI: Add devices implementation and moves GPU to devices

---
 pkg/kubelet/dockershim/docker_container.go    | 12 ++++++-
 pkg/kubelet/dockertools/docker_manager.go     | 18 +++++-----
 pkg/kubelet/kubelet_pods.go                   | 18 ++++++++++
 pkg/kubelet/kubelet_pods_test.go              | 34 +++++++++++++++++++
 .../kuberuntime/kuberuntime_container.go      | 17 ++++++++++
 5 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/pkg/kubelet/dockershim/docker_container.go b/pkg/kubelet/dockershim/docker_container.go
index 682b44d5273..2921975b802 100644
--- a/pkg/kubelet/dockershim/docker_container.go
+++ b/pkg/kubelet/dockershim/docker_container.go
@@ -162,13 +162,23 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi
 				CPUShares:  rOpts.GetCpuShares(),
 				CPUQuota:   rOpts.GetCpuQuota(),
 				CPUPeriod:  rOpts.GetCpuPeriod(),
-				// TODO: Need to set devices.
 			}
 			hc.OomScoreAdj = int(rOpts.GetOomScoreAdj())
 		}
 		// Note: ShmSize is handled in kube_docker_client.go
 	}
 
+	// Set devices for container.
+	devices := make([]dockercontainer.DeviceMapping, len(config.Devices))
+	for i, device := range config.Devices {
+		devices[i] = dockercontainer.DeviceMapping{
+			PathOnHost:        device.GetHostPath(),
+			PathInContainer:   device.GetContainerPath(),
+			CgroupPermissions: device.GetPermissions(),
+		}
+	}
+	hc.Resources.Devices = devices
+
 	var err error
 	hc.SecurityOpt, err = getContainerSecurityOpts(config.Metadata.GetName(), sandboxConfig, ds.seccompProfileRoot)
 	if err != nil {
diff --git a/pkg/kubelet/dockertools/docker_manager.go b/pkg/kubelet/dockertools/docker_manager.go
index fc316758bca..9443106c835 100644
--- a/pkg/kubelet/dockertools/docker_manager.go
+++ b/pkg/kubelet/dockertools/docker_manager.go
@@ -619,7 +619,6 @@ func (dm *DockerManager) runContainer(
 	memoryLimit := container.Resources.Limits.Memory().Value()
 	cpuRequest := container.Resources.Requests.Cpu()
 	cpuLimit := container.Resources.Limits.Cpu()
-	nvidiaGPULimit := container.Resources.Limits.NvidiaGPU()
 	var cpuShares int64
 	// If request is not specified, but limit is, we want request to default to limit.
 	// API server does this for new containers, but we repeat this logic in Kubelet
@@ -631,17 +630,18 @@ func (dm *DockerManager) runContainer(
 		// of CPU shares.
 		cpuShares = milliCPUToShares(cpuRequest.MilliValue())
 	}
-	var devices []dockercontainer.DeviceMapping
-	if nvidiaGPULimit.Value() != 0 {
-		// Experimental. For now, we hardcode /dev/nvidia0 no matter what the user asks for
-		// (we only support one device per node).
-		devices = []dockercontainer.DeviceMapping{
-			{PathOnHost: "/dev/nvidia0", PathInContainer: "/dev/nvidia0", CgroupPermissions: "mrw"},
-			{PathOnHost: "/dev/nvidiactl", PathInContainer: "/dev/nvidiactl", CgroupPermissions: "mrw"},
-			{PathOnHost: "/dev/nvidia-uvm", PathInContainer: "/dev/nvidia-uvm", CgroupPermissions: "mrw"},
+
+	// Set devices for container.
+	devices := make([]dockercontainer.DeviceMapping, len(opts.Devices))
+	for i, device := range opts.Devices {
+		devices[i] = dockercontainer.DeviceMapping{
+			PathOnHost:        device.PathOnHost,
+			PathInContainer:   device.PathInContainer,
+			CgroupPermissions: device.Permissions,
 		}
 	}
 	binds := makeMountBindings(opts.Mounts)
+
 	// The reason we create and mount the log file in here (not in kubelet) is because
 	// the file's location depends on the ID of the container, and we need to create and
 	// mount the file before actually starting the container.
diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go
index 02f5be6ef3f..a6746607f50 100644
--- a/pkg/kubelet/kubelet_pods.go
+++ b/pkg/kubelet/kubelet_pods.go
@@ -74,6 +74,23 @@ func (kl *Kubelet) getActivePods() []*api.Pod {
 	return activePods
 }
 
+// makeDevices determines the devices for the given container.
+// Experimental. For now, we hardcode /dev/nvidia0 no matter what the user asks for
+// (we only support one device per node).
+// TODO: add support for more than 1 GPU after #28216.
+func makeDevices(container *api.Container) []kubecontainer.DeviceInfo {
+	nvidiaGPULimit := container.Resources.Limits.NvidiaGPU()
+	if nvidiaGPULimit.Value() != 0 {
+		return []kubecontainer.DeviceInfo{
+			{PathOnHost: "/dev/nvidia0", PathInContainer: "/dev/nvidia0", Permissions: "mrw"},
+			{PathOnHost: "/dev/nvidiactl", PathInContainer: "/dev/nvidiactl", Permissions: "mrw"},
+			{PathOnHost: "/dev/nvidia-uvm", PathInContainer: "/dev/nvidia-uvm", Permissions: "mrw"},
+		}
+	}
+
+	return nil
+}
+
 // makeMounts determines the mount points for the given container.
 func makeMounts(pod *api.Pod, podDir string, container *api.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap) ([]kubecontainer.Mount, error) {
 	// Kubernetes only mounts on /etc/hosts if :
@@ -252,6 +269,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *api.Pod, container *api.Cont
 	volumes := kl.volumeManager.GetMountedVolumesForPod(podName)
 
 	opts.PortMappings = makePortMappings(container)
+	opts.Devices = makeDevices(container)
 
 	opts.Mounts, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes)
 	if err != nil {
diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go
index 159e9c0c356..add58b6f3c4 100644
--- a/pkg/kubelet/kubelet_pods_test.go
+++ b/pkg/kubelet/kubelet_pods_test.go
@@ -28,6 +28,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/resource"
 	"k8s.io/kubernetes/pkg/apimachinery/registered"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
@@ -1262,3 +1263,36 @@ func TestGetHostPortConflicts(t *testing.T) {
 	pods = append(pods, expected)
 	assert.True(t, hasHostPortConflicts(pods), "Should have port conflicts")
 }
+
+func TestMakeDevices(t *testing.T) {
+	testCases := []struct {
+		container *api.Container
+		devices   []kubecontainer.DeviceInfo
+		test      string
+	}{
+		{
+			test:      "no device",
+			container: &api.Container{},
+			devices:   nil,
+		},
+		{
+			test: "gpu",
+			container: &api.Container{
+				Resources: api.ResourceRequirements{
+					Limits: map[api.ResourceName]resource.Quantity{
+						api.ResourceNvidiaGPU: resource.MustParse("1000"),
+					},
+				},
+			},
+			devices: []kubecontainer.DeviceInfo{
+				{PathOnHost: "/dev/nvidia0", PathInContainer: "/dev/nvidia0", Permissions: "mrw"},
+				{PathOnHost: "/dev/nvidiactl", PathInContainer: "/dev/nvidiactl", Permissions: "mrw"},
+				{PathOnHost: "/dev/nvidia-uvm", PathInContainer: "/dev/nvidia-uvm", Permissions: "mrw"},
+			},
+		},
+	}
+
+	for _, test := range testCases {
+		assert.Equal(t, test.devices, makeDevices(test.container), "[test %q]", test.test)
+	}
+}
diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go
index 6b4566848e2..3b867387e2d 100644
--- a/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go
@@ -151,6 +151,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta
 		Labels:      newContainerLabels(container, pod),
 		Annotations: newContainerAnnotations(container, pod, restartCount),
 		Mounts:      m.makeMounts(opts, container, podHasSELinuxLabel),
+		Devices:     makeDevices(opts),
 		LogPath:     &containerLogsPath,
 		Stdin:       &container.Stdin,
 		StdinOnce:   &container.StdinOnce,
@@ -251,6 +252,22 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.
 	return linuxConfig
 }
 
+// makeDevices generates container devices for kubelet runtime api.
+func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeApi.Device {
+	devices := make([]*runtimeApi.Device, len(opts.Devices))
+
+	for idx := range opts.Devices {
+		device := opts.Devices[idx]
+		devices[idx] = &runtimeApi.Device{
+			HostPath:      &device.PathOnHost,
+			ContainerPath: &device.PathInContainer,
+			Permissions:   &device.Permissions,
+		}
+	}
+
+	return devices
+}
+
 // makeMounts generates container volume mounts for kubelet runtime api.
 func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *api.Container, podHasSELinuxLabel bool) []*runtimeApi.Mount {
 	volumeMounts := []*runtimeApi.Mount{}