212 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			212 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package nvidia
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"fmt"
 | 
						|
	"os"
 | 
						|
	"os/exec"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
 | 
						|
	"github.com/containerd/containerd/containers"
 | 
						|
	"github.com/containerd/containerd/oci"
 | 
						|
	specs "github.com/opencontainers/runtime-spec/specs-go"
 | 
						|
)
 | 
						|
 | 
						|
// NvidiaCLI is the path to the Nvidia helper binary
 | 
						|
const NvidiaCLI = "nvidia-container-cli"
 | 
						|
 | 
						|
// Capability specifies capabilities for the gpu inside the container
 | 
						|
// Detailed explanation of options can be found:
 | 
						|
// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
 | 
						|
type Capability string
 | 
						|
 | 
						|
const (
 | 
						|
	// Compute capability
 | 
						|
	Compute Capability = "compute"
 | 
						|
	// Compat32 capability
 | 
						|
	Compat32 Capability = "compat32"
 | 
						|
	// Graphics capability
 | 
						|
	Graphics Capability = "graphics"
 | 
						|
	// Utility capability
 | 
						|
	Utility Capability = "utility"
 | 
						|
	// Video capability
 | 
						|
	Video Capability = "video"
 | 
						|
	// Display capability
 | 
						|
	Display Capability = "display"
 | 
						|
)
 | 
						|
 | 
						|
// AllCaps returns the complete list of supported Nvidia capabilities.
 | 
						|
func AllCaps() []Capability {
 | 
						|
	return []Capability{
 | 
						|
		Compute,
 | 
						|
		Compat32,
 | 
						|
		Graphics,
 | 
						|
		Utility,
 | 
						|
		Video,
 | 
						|
		Display,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// WithGPUs adds NVIDIA gpu support to a container
 | 
						|
func WithGPUs(opts ...Opts) oci.SpecOpts {
 | 
						|
	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
 | 
						|
		c := &config{}
 | 
						|
		for _, o := range opts {
 | 
						|
			if err := o(c); err != nil {
 | 
						|
				return err
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if c.OCIHookPath == "" {
 | 
						|
			path, err := exec.LookPath("containerd")
 | 
						|
			if err != nil {
 | 
						|
				return err
 | 
						|
			}
 | 
						|
			c.OCIHookPath = path
 | 
						|
		}
 | 
						|
		nvidiaPath, err := exec.LookPath(NvidiaCLI)
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		if s.Hooks == nil {
 | 
						|
			s.Hooks = &specs.Hooks{}
 | 
						|
		}
 | 
						|
		s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
 | 
						|
			Path: c.OCIHookPath,
 | 
						|
			Args: append([]string{
 | 
						|
				"containerd",
 | 
						|
				"oci-hook",
 | 
						|
				"--",
 | 
						|
				nvidiaPath,
 | 
						|
				// ensures the required kernel modules are properly loaded
 | 
						|
				"--load-kmods",
 | 
						|
			}, c.args()...),
 | 
						|
			Env: os.Environ(),
 | 
						|
		})
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
type config struct {
 | 
						|
	Devices      []string
 | 
						|
	Capabilities []Capability
 | 
						|
	LoadKmods    bool
 | 
						|
	LDCache      string
 | 
						|
	LDConfig     string
 | 
						|
	Requirements []string
 | 
						|
	OCIHookPath  string
 | 
						|
}
 | 
						|
 | 
						|
func (c *config) args() []string {
 | 
						|
	var args []string
 | 
						|
 | 
						|
	if c.LoadKmods {
 | 
						|
		args = append(args, "--load-kmods")
 | 
						|
	}
 | 
						|
	if c.LDCache != "" {
 | 
						|
		args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
 | 
						|
	}
 | 
						|
	args = append(args,
 | 
						|
		"configure",
 | 
						|
	)
 | 
						|
	if len(c.Devices) > 0 {
 | 
						|
		args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
 | 
						|
	}
 | 
						|
	for _, c := range c.Capabilities {
 | 
						|
		args = append(args, fmt.Sprintf("--%s", c))
 | 
						|
	}
 | 
						|
	if c.LDConfig != "" {
 | 
						|
		args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
 | 
						|
	}
 | 
						|
	for _, r := range c.Requirements {
 | 
						|
		args = append(args, fmt.Sprintf("--require=%s", r))
 | 
						|
	}
 | 
						|
	args = append(args, "--pid={{pid}}", "{{rootfs}}")
 | 
						|
	return args
 | 
						|
}
 | 
						|
 | 
						|
// Opts are options for configuring gpu support
 | 
						|
type Opts func(*config) error
 | 
						|
 | 
						|
// WithDevices adds the provided device indexes to the container
 | 
						|
func WithDevices(ids ...int) Opts {
 | 
						|
	return func(c *config) error {
 | 
						|
		for _, i := range ids {
 | 
						|
			c.Devices = append(c.Devices, strconv.Itoa(i))
 | 
						|
		}
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// WithDeviceUUIDs adds the specific device UUID to the container
 | 
						|
func WithDeviceUUIDs(uuids ...string) Opts {
 | 
						|
	return func(c *config) error {
 | 
						|
		c.Devices = append(c.Devices, uuids...)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// WithAllDevices adds all gpus to the container
 | 
						|
func WithAllDevices(c *config) error {
 | 
						|
	c.Devices = []string{"all"}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// WithAllCapabilities adds all capabilities to the container for the gpus
 | 
						|
func WithAllCapabilities(c *config) error {
 | 
						|
	c.Capabilities = AllCaps()
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// WithCapabilities adds the specified capabilities to the container for the gpus
 | 
						|
func WithCapabilities(caps ...Capability) Opts {
 | 
						|
	return func(c *config) error {
 | 
						|
		c.Capabilities = append(c.Capabilities, caps...)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// WithRequiredCUDAVersion sets the required cuda version
 | 
						|
func WithRequiredCUDAVersion(major, minor int) Opts {
 | 
						|
	return func(c *config) error {
 | 
						|
		c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// WithOCIHookPath sets the hook path for the binary
 | 
						|
func WithOCIHookPath(path string) Opts {
 | 
						|
	return func(c *config) error {
 | 
						|
		c.OCIHookPath = path
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// WithLookupOCIHookPath sets the hook path for the binary via a binary name
 | 
						|
func WithLookupOCIHookPath(name string) Opts {
 | 
						|
	return func(c *config) error {
 | 
						|
		path, err := exec.LookPath(name)
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		c.OCIHookPath = path
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
}
 |