Map nvidia devices one to one.

Signed-off-by: Vishnu kannan <vishnuk@google.com>
This commit is contained in:
Vishnu kannan 2017-02-27 15:16:30 -08:00
parent 318f4e102a
commit 2554b95994
5 changed files with 61 additions and 54 deletions

View File

@ -78,7 +78,8 @@ const (
// alpha: v1.6 // alpha: v1.6
// //
// Enables support for GPUs as a schedulable resource. // Enables support for GPUs as a schedulable resource.
// Only Nvidia GPUs are supported as of v1.6 // Only Nvidia GPUs are supported as of v1.6.
// Works only with Docker Container Runtime.
Accelerators utilfeature.Feature = "Accelerators" Accelerators utilfeature.Feature = "Accelerators"
) )

View File

@ -23,7 +23,7 @@ type podGPUs struct {
podGPUMapping map[string]sets.String podGPUMapping map[string]sets.String
} }
func newPodGpus() *podGPUs { func newPodGPUs() *podGPUs {
return &podGPUs{ return &podGPUs{
podGPUMapping: map[string]sets.String{}, podGPUMapping: map[string]sets.String{},
} }

View File

@ -33,17 +33,17 @@ import (
"k8s.io/kubernetes/pkg/kubelet/gpu" "k8s.io/kubernetes/pkg/kubelet/gpu"
) )
// TODO: If use NVML in the future, the implementation could be more complex, // TODO: rework to use Nvidia's NVML, which is more complex, but also provides more fine-grained information and stats.
// but also more powerful!
const ( const (
// All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm // All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm
// If the driver installed correctly, the 2 devices must be there. // If the driver installed correctly, the 2 devices will be there.
NvidiaCtlDevice string = "/dev/nvidiactl" nvidiaCtlDevice string = "/dev/nvidiactl"
NvidiaUVMDevice string = "/dev/nvidia-uvm" nvidiaUVMDevice string = "/dev/nvidia-uvm"
devDirectory = "/dev" // Optional device.
nvidiaDeviceRE = `^nvidia[0-9]*$` nvidiaUVMToolsDevice string = "/dev/nvidia-uvm-tools"
nvidiaFullpathRE = `^/dev/nvidia[0-9]*$` devDirectory = "/dev"
nvidiaDeviceRE = `^nvidia[0-9]*$`
nvidiaFullpathRE = `^/dev/nvidia[0-9]*$`
) )
type activePodsLister interface { type activePodsLister interface {
@ -55,8 +55,9 @@ type activePodsLister interface {
type nvidiaGPUManager struct { type nvidiaGPUManager struct {
sync.Mutex sync.Mutex
// All gpus available on the Node // All gpus available on the Node
allGPUs sets.String allGPUs sets.String
allocated *podGPUs allocated *podGPUs
defaultDevices []string
// The interface which could get GPU mapping from all the containers. // The interface which could get GPU mapping from all the containers.
// TODO: Should make this independent of Docker in the future. // TODO: Should make this independent of Docker in the future.
dockerClient dockertools.DockerInterface dockerClient dockertools.DockerInterface
@ -65,35 +66,47 @@ type nvidiaGPUManager struct {
// NewNvidiaGPUManager returns a GPUManager that manages local Nvidia GPUs. // NewNvidiaGPUManager returns a GPUManager that manages local Nvidia GPUs.
// TODO: Migrate to use pod level cgroups and make it generic to all runtimes. // TODO: Migrate to use pod level cgroups and make it generic to all runtimes.
func NewNvidiaGPUManager(activePodsLister activePodsLister, dockerClient dockertools.DockerInterface) gpu.GPUManager { func NewNvidiaGPUManager(activePodsLister activePodsLister, dockerClient dockertools.DockerInterface) (gpu.GPUManager, error) {
if dockerClient == nil {
return nil, fmt.Errorf("invalid docker client specified")
}
return &nvidiaGPUManager{ return &nvidiaGPUManager{
allGPUs: sets.NewString(), allGPUs: sets.NewString(),
dockerClient: dockerClient, dockerClient: dockerClient,
activePodsLister: activePodsLister, activePodsLister: activePodsLister,
} }, nil
} }
// Initialize the GPU devices, so far only needed to discover the GPU paths. // Initialize the GPU devices, so far only needed to discover the GPU paths.
func (ngm *nvidiaGPUManager) Start() error { func (ngm *nvidiaGPUManager) Start() error {
if _, err := os.Stat(NvidiaCtlDevice); err != nil { if ngm.dockerClient == nil {
return err return fmt.Errorf("invalid docker client specified")
}
if _, err := os.Stat(NvidiaUVMDevice); err != nil {
return err
} }
ngm.Lock() ngm.Lock()
defer ngm.Unlock() defer ngm.Unlock()
if _, err := os.Stat(nvidiaCtlDevice); err != nil {
return err
}
if _, err := os.Stat(nvidiaUVMDevice); err != nil {
return err
}
ngm.defaultDevices = []string{nvidiaCtlDevice, nvidiaUVMDevice}
_, err := os.Stat(nvidiaUVMToolsDevice)
if os.IsNotExist(err) {
ngm.defaultDevices = append(ngm.defaultDevices, nvidiaUVMToolsDevice)
}
if err := ngm.discoverGPUs(); err != nil { if err := ngm.discoverGPUs(); err != nil {
return err return err
} }
// Its possible that the runtime isn't available now. // It's possible that the runtime isn't available now.
allocatedGPUs, err := ngm.gpusInUse() allocatedGPUs, err := ngm.gpusInUse()
if err == nil { if err == nil {
ngm.allocated = allocatedGPUs ngm.allocated = allocatedGPUs
} }
// We ignore errors with identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up. // We ignore errors when identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up.
return nil return nil
} }
@ -130,13 +143,13 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
// Initialization is not complete. Try now. Failures can no longer be tolerated. // Initialization is not complete. Try now. Failures can no longer be tolerated.
allocated, err := ngm.gpusInUse() allocated, err := ngm.gpusInUse()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to allocate GPUs because of issues identifying GPUs in use: %v", err) return nil, fmt.Errorf("Failed to allocate GPUs because of issues identifying GPUs in use: %v", err)
} }
ngm.allocated = allocated ngm.allocated = allocated
} else { } else {
// update internal list of GPUs in use prior to allocating new GPUs. // update internal list of GPUs in use prior to allocating new GPUs.
if err := ngm.updateAllocatedGPUs(); err != nil { if err := ngm.updateAllocatedGPUs(); err != nil {
return nil, fmt.Errorf("failed to allocate GPUs because of issues with updating GPUs in use: %v", err) return nil, fmt.Errorf("Failed to allocate GPUs because of issues with updating GPUs in use: %v", err)
} }
} }
// Get GPU devices in use. // Get GPU devices in use.
@ -146,23 +159,24 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
if int64(available.Len()) < gpusNeeded { if int64(available.Len()) < gpusNeeded {
return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len()) return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len())
} }
var ret []string ret := available.List()[:gpusNeeded]
for _, device := range available.List() { for _, device := range ret {
if gpusNeeded > 0 { // Update internal allocated GPU cache.
ret = append(ret, device) ngm.allocated.insert(string(pod.UID), device)
// Update internal allocated GPU cache.
ngm.allocated.insert(string(pod.UID), device)
}
gpusNeeded--
} }
// Add standard devices files that needs to be exposed.
ret = append(ret, ngm.defaultDevices...)
return ret, nil return ret, nil
} }
// updateAllocatedGPUs updates the list of GPUs in use.
// It gets a list of running pods and then frees any GPUs that are bound to terminated pods.
// Returns error on failure.
func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error { func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
activePods, err := ngm.activePodsLister.GetRunningPods() activePods, err := ngm.activePodsLister.GetRunningPods()
if err != nil { if err != nil {
return fmt.Errorf("failed to list active pods: %v", err) return fmt.Errorf("Failed to list active pods: %v", err)
} }
activePodUids := sets.NewString() activePodUids := sets.NewString()
for _, pod := range activePods { for _, pod := range activePods {
@ -232,12 +246,12 @@ func (ngm *nvidiaGPUManager) gpusInUse() (*podGPUs, error) {
// add the pod and its containers that need to be inspected. // add the pod and its containers that need to be inspected.
podContainersToInspect = append(podContainersToInspect, podContainers{string(pod.UID), containerIDs}) podContainersToInspect = append(podContainersToInspect, podContainers{string(pod.UID), containerIDs})
} }
ret := newPodGpus() ret := newPodGPUs()
for _, podContainer := range podContainersToInspect { for _, podContainer := range podContainersToInspect {
for _, containerId := range podContainer.containerIDs.List() { for _, containerId := range podContainer.containerIDs.List() {
containerJSON, err := ngm.dockerClient.InspectContainer(containerId) containerJSON, err := ngm.dockerClient.InspectContainer(containerId)
if err != nil { if err != nil {
glog.V(3).Infof("failed to inspect container %q in pod %q while attempting to reconcile nvidia gpus in use", containerId, podContainer.uid) glog.V(3).Infof("Failed to inspect container %q in pod %q while attempting to reconcile nvidia gpus in use", containerId, podContainer.uid)
continue continue
} }

View File

@ -788,7 +788,12 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime) klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator)) klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
if utilfeature.DefaultFeatureGate.Enabled(features.Accelerators) { if utilfeature.DefaultFeatureGate.Enabled(features.Accelerators) {
klet.gpuManager = nvidia.NewNvidiaGPUManager(klet, klet.dockerClient) if kubeCfg.ContainerRuntime != "docker" {
return nil, fmt.Errorf("Accelerators feature is supported with docker runtime only.")
}
if klet.gpuManager, err = nvidia.NewNvidiaGPUManager(klet, klet.dockerClient); err != nil {
return nil, err
}
} else { } else {
klet.gpuManager = gpu.NewGPUManagerStub() klet.gpuManager = gpu.NewGPUManagerStub()
} }

View File

@ -28,7 +28,6 @@ import (
"path/filepath" "path/filepath"
"runtime" "runtime"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -49,7 +48,6 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/envvars" "k8s.io/kubernetes/pkg/kubelet/envvars"
"k8s.io/kubernetes/pkg/kubelet/gpu/nvidia"
"k8s.io/kubernetes/pkg/kubelet/images" "k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/server/portforward" "k8s.io/kubernetes/pkg/kubelet/server/portforward"
@ -96,21 +94,10 @@ func (kl *Kubelet) makeDevices(pod *v1.Pod, container *v1.Container) ([]kubecont
if err != nil { if err != nil {
return nil, err return nil, err
} }
devices := []kubecontainer.DeviceInfo{ var devices []kubecontainer.DeviceInfo
{ for _, path := range nvidiaGPUPaths {
PathOnHost: nvidia.NvidiaCtlDevice, // Devices have to be mapped one to one because of nvidia CUDA library requirements.
PathInContainer: nvidia.NvidiaCtlDevice, devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: path, Permissions: "mrw"})
Permissions: "mrw",
},
{
PathOnHost: nvidia.NvidiaUVMDevice,
PathInContainer: nvidia.NvidiaUVMDevice,
Permissions: "mrw",
},
}
for i, path := range nvidiaGPUPaths {
devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: "/dev/nvidia" + strconv.Itoa(i), Permissions: "mrw"})
} }
return devices, nil return devices, nil