Map nvidia devices one to one.
Signed-off-by: Vishnu kannan <vishnuk@google.com>
This commit is contained in:
parent
318f4e102a
commit
2554b95994
@ -78,7 +78,8 @@ const (
|
|||||||
// alpha: v1.6
|
// alpha: v1.6
|
||||||
//
|
//
|
||||||
// Enables support for GPUs as a schedulable resource.
|
// Enables support for GPUs as a schedulable resource.
|
||||||
// Only Nvidia GPUs are supported as of v1.6
|
// Only Nvidia GPUs are supported as of v1.6.
|
||||||
|
// Works only with Docker Container Runtime.
|
||||||
Accelerators utilfeature.Feature = "Accelerators"
|
Accelerators utilfeature.Feature = "Accelerators"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ type podGPUs struct {
|
|||||||
podGPUMapping map[string]sets.String
|
podGPUMapping map[string]sets.String
|
||||||
}
|
}
|
||||||
|
|
||||||
func newPodGpus() *podGPUs {
|
func newPodGPUs() *podGPUs {
|
||||||
return &podGPUs{
|
return &podGPUs{
|
||||||
podGPUMapping: map[string]sets.String{},
|
podGPUMapping: map[string]sets.String{},
|
||||||
}
|
}
|
||||||
|
@ -33,17 +33,17 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/gpu"
|
"k8s.io/kubernetes/pkg/kubelet/gpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: If use NVML in the future, the implementation could be more complex,
|
// TODO: rework to use Nvidia's NVML, which is more complex, but also provides more fine-grained information and stats.
|
||||||
// but also more powerful!
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm
|
// All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm
|
||||||
// If the driver installed correctly, the 2 devices must be there.
|
// If the driver installed correctly, the 2 devices will be there.
|
||||||
NvidiaCtlDevice string = "/dev/nvidiactl"
|
nvidiaCtlDevice string = "/dev/nvidiactl"
|
||||||
NvidiaUVMDevice string = "/dev/nvidia-uvm"
|
nvidiaUVMDevice string = "/dev/nvidia-uvm"
|
||||||
devDirectory = "/dev"
|
// Optional device.
|
||||||
nvidiaDeviceRE = `^nvidia[0-9]*$`
|
nvidiaUVMToolsDevice string = "/dev/nvidia-uvm-tools"
|
||||||
nvidiaFullpathRE = `^/dev/nvidia[0-9]*$`
|
devDirectory = "/dev"
|
||||||
|
nvidiaDeviceRE = `^nvidia[0-9]*$`
|
||||||
|
nvidiaFullpathRE = `^/dev/nvidia[0-9]*$`
|
||||||
)
|
)
|
||||||
|
|
||||||
type activePodsLister interface {
|
type activePodsLister interface {
|
||||||
@ -55,8 +55,9 @@ type activePodsLister interface {
|
|||||||
type nvidiaGPUManager struct {
|
type nvidiaGPUManager struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
// All gpus available on the Node
|
// All gpus available on the Node
|
||||||
allGPUs sets.String
|
allGPUs sets.String
|
||||||
allocated *podGPUs
|
allocated *podGPUs
|
||||||
|
defaultDevices []string
|
||||||
// The interface which could get GPU mapping from all the containers.
|
// The interface which could get GPU mapping from all the containers.
|
||||||
// TODO: Should make this independent of Docker in the future.
|
// TODO: Should make this independent of Docker in the future.
|
||||||
dockerClient dockertools.DockerInterface
|
dockerClient dockertools.DockerInterface
|
||||||
@ -65,35 +66,47 @@ type nvidiaGPUManager struct {
|
|||||||
|
|
||||||
// NewNvidiaGPUManager returns a GPUManager that manages local Nvidia GPUs.
|
// NewNvidiaGPUManager returns a GPUManager that manages local Nvidia GPUs.
|
||||||
// TODO: Migrate to use pod level cgroups and make it generic to all runtimes.
|
// TODO: Migrate to use pod level cgroups and make it generic to all runtimes.
|
||||||
func NewNvidiaGPUManager(activePodsLister activePodsLister, dockerClient dockertools.DockerInterface) gpu.GPUManager {
|
func NewNvidiaGPUManager(activePodsLister activePodsLister, dockerClient dockertools.DockerInterface) (gpu.GPUManager, error) {
|
||||||
|
if dockerClient == nil {
|
||||||
|
return nil, fmt.Errorf("invalid docker client specified")
|
||||||
|
}
|
||||||
return &nvidiaGPUManager{
|
return &nvidiaGPUManager{
|
||||||
allGPUs: sets.NewString(),
|
allGPUs: sets.NewString(),
|
||||||
dockerClient: dockerClient,
|
dockerClient: dockerClient,
|
||||||
activePodsLister: activePodsLister,
|
activePodsLister: activePodsLister,
|
||||||
}
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the GPU devices, so far only needed to discover the GPU paths.
|
// Initialize the GPU devices, so far only needed to discover the GPU paths.
|
||||||
func (ngm *nvidiaGPUManager) Start() error {
|
func (ngm *nvidiaGPUManager) Start() error {
|
||||||
if _, err := os.Stat(NvidiaCtlDevice); err != nil {
|
if ngm.dockerClient == nil {
|
||||||
return err
|
return fmt.Errorf("invalid docker client specified")
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := os.Stat(NvidiaUVMDevice); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
ngm.Lock()
|
ngm.Lock()
|
||||||
defer ngm.Unlock()
|
defer ngm.Unlock()
|
||||||
|
|
||||||
|
if _, err := os.Stat(nvidiaCtlDevice); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := os.Stat(nvidiaUVMDevice); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ngm.defaultDevices = []string{nvidiaCtlDevice, nvidiaUVMDevice}
|
||||||
|
_, err := os.Stat(nvidiaUVMToolsDevice)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
ngm.defaultDevices = append(ngm.defaultDevices, nvidiaUVMToolsDevice)
|
||||||
|
}
|
||||||
|
|
||||||
if err := ngm.discoverGPUs(); err != nil {
|
if err := ngm.discoverGPUs(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Its possible that the runtime isn't available now.
|
// It's possible that the runtime isn't available now.
|
||||||
allocatedGPUs, err := ngm.gpusInUse()
|
allocatedGPUs, err := ngm.gpusInUse()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
ngm.allocated = allocatedGPUs
|
ngm.allocated = allocatedGPUs
|
||||||
}
|
}
|
||||||
// We ignore errors with identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up.
|
// We ignore errors when identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,13 +143,13 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
|
|||||||
// Initialization is not complete. Try now. Failures can no longer be tolerated.
|
// Initialization is not complete. Try now. Failures can no longer be tolerated.
|
||||||
allocated, err := ngm.gpusInUse()
|
allocated, err := ngm.gpusInUse()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to allocate GPUs because of issues identifying GPUs in use: %v", err)
|
return nil, fmt.Errorf("Failed to allocate GPUs because of issues identifying GPUs in use: %v", err)
|
||||||
}
|
}
|
||||||
ngm.allocated = allocated
|
ngm.allocated = allocated
|
||||||
} else {
|
} else {
|
||||||
// update internal list of GPUs in use prior to allocating new GPUs.
|
// update internal list of GPUs in use prior to allocating new GPUs.
|
||||||
if err := ngm.updateAllocatedGPUs(); err != nil {
|
if err := ngm.updateAllocatedGPUs(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to allocate GPUs because of issues with updating GPUs in use: %v", err)
|
return nil, fmt.Errorf("Failed to allocate GPUs because of issues with updating GPUs in use: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Get GPU devices in use.
|
// Get GPU devices in use.
|
||||||
@ -146,23 +159,24 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
|
|||||||
if int64(available.Len()) < gpusNeeded {
|
if int64(available.Len()) < gpusNeeded {
|
||||||
return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len())
|
return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len())
|
||||||
}
|
}
|
||||||
var ret []string
|
ret := available.List()[:gpusNeeded]
|
||||||
for _, device := range available.List() {
|
for _, device := range ret {
|
||||||
if gpusNeeded > 0 {
|
// Update internal allocated GPU cache.
|
||||||
ret = append(ret, device)
|
ngm.allocated.insert(string(pod.UID), device)
|
||||||
// Update internal allocated GPU cache.
|
|
||||||
ngm.allocated.insert(string(pod.UID), device)
|
|
||||||
}
|
|
||||||
gpusNeeded--
|
|
||||||
}
|
}
|
||||||
|
// Add standard devices files that needs to be exposed.
|
||||||
|
ret = append(ret, ngm.defaultDevices...)
|
||||||
|
|
||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// updateAllocatedGPUs updates the list of GPUs in use.
|
||||||
|
// It gets a list of running pods and then frees any GPUs that are bound to terminated pods.
|
||||||
|
// Returns error on failure.
|
||||||
func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
|
func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
|
||||||
activePods, err := ngm.activePodsLister.GetRunningPods()
|
activePods, err := ngm.activePodsLister.GetRunningPods()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to list active pods: %v", err)
|
return fmt.Errorf("Failed to list active pods: %v", err)
|
||||||
}
|
}
|
||||||
activePodUids := sets.NewString()
|
activePodUids := sets.NewString()
|
||||||
for _, pod := range activePods {
|
for _, pod := range activePods {
|
||||||
@ -232,12 +246,12 @@ func (ngm *nvidiaGPUManager) gpusInUse() (*podGPUs, error) {
|
|||||||
// add the pod and its containers that need to be inspected.
|
// add the pod and its containers that need to be inspected.
|
||||||
podContainersToInspect = append(podContainersToInspect, podContainers{string(pod.UID), containerIDs})
|
podContainersToInspect = append(podContainersToInspect, podContainers{string(pod.UID), containerIDs})
|
||||||
}
|
}
|
||||||
ret := newPodGpus()
|
ret := newPodGPUs()
|
||||||
for _, podContainer := range podContainersToInspect {
|
for _, podContainer := range podContainersToInspect {
|
||||||
for _, containerId := range podContainer.containerIDs.List() {
|
for _, containerId := range podContainer.containerIDs.List() {
|
||||||
containerJSON, err := ngm.dockerClient.InspectContainer(containerId)
|
containerJSON, err := ngm.dockerClient.InspectContainer(containerId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(3).Infof("failed to inspect container %q in pod %q while attempting to reconcile nvidia gpus in use", containerId, podContainer.uid)
|
glog.V(3).Infof("Failed to inspect container %q in pod %q while attempting to reconcile nvidia gpus in use", containerId, podContainer.uid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -788,7 +788,12 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||||||
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
|
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
|
||||||
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
|
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.Accelerators) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.Accelerators) {
|
||||||
klet.gpuManager = nvidia.NewNvidiaGPUManager(klet, klet.dockerClient)
|
if kubeCfg.ContainerRuntime != "docker" {
|
||||||
|
return nil, fmt.Errorf("Accelerators feature is supported with docker runtime only.")
|
||||||
|
}
|
||||||
|
if klet.gpuManager, err = nvidia.NewNvidiaGPUManager(klet, klet.dockerClient); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
klet.gpuManager = gpu.NewGPUManagerStub()
|
klet.gpuManager = gpu.NewGPUManagerStub()
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -49,7 +48,6 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/envvars"
|
"k8s.io/kubernetes/pkg/kubelet/envvars"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/gpu/nvidia"
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/images"
|
"k8s.io/kubernetes/pkg/kubelet/images"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/server/portforward"
|
"k8s.io/kubernetes/pkg/kubelet/server/portforward"
|
||||||
@ -96,21 +94,10 @@ func (kl *Kubelet) makeDevices(pod *v1.Pod, container *v1.Container) ([]kubecont
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
devices := []kubecontainer.DeviceInfo{
|
var devices []kubecontainer.DeviceInfo
|
||||||
{
|
for _, path := range nvidiaGPUPaths {
|
||||||
PathOnHost: nvidia.NvidiaCtlDevice,
|
// Devices have to be mapped one to one because of nvidia CUDA library requirements.
|
||||||
PathInContainer: nvidia.NvidiaCtlDevice,
|
devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: path, Permissions: "mrw"})
|
||||||
Permissions: "mrw",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
PathOnHost: nvidia.NvidiaUVMDevice,
|
|
||||||
PathInContainer: nvidia.NvidiaUVMDevice,
|
|
||||||
Permissions: "mrw",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, path := range nvidiaGPUPaths {
|
|
||||||
devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: "/dev/nvidia" + strconv.Itoa(i), Permissions: "mrw"})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return devices, nil
|
return devices, nil
|
||||||
|
Loading…
Reference in New Issue
Block a user