 0693b936d2
			
		
	
	0693b936d2
	
	
	
		
			
			Prestart Hook is deprecated and can be replaced with CreateRuntime hook Signed-off-by: Akhil Mohan <akhilerm@gmail.com>
		
			
				
	
	
		
			222 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			222 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| */
 | |
| 
 | |
| package nvidia
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"os/exec"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 
 | |
| 	"github.com/containerd/containerd/v2/core/containers"
 | |
| 	"github.com/containerd/containerd/v2/pkg/oci"
 | |
| 	specs "github.com/opencontainers/runtime-spec/specs-go"
 | |
| )
 | |
| 
 | |
| // NvidiaCLI is the path to the Nvidia helper binary
 | |
| const NvidiaCLI = "nvidia-container-cli"
 | |
| 
 | |
| // Capability specifies capabilities for the gpu inside the container
 | |
| // Detailed explanation of options can be found:
 | |
| // https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
 | |
| type Capability string
 | |
| 
 | |
| const (
 | |
| 	// Compute capability
 | |
| 	Compute Capability = "compute"
 | |
| 	// Compat32 capability
 | |
| 	Compat32 Capability = "compat32"
 | |
| 	// Graphics capability
 | |
| 	Graphics Capability = "graphics"
 | |
| 	// Utility capability
 | |
| 	Utility Capability = "utility"
 | |
| 	// Video capability
 | |
| 	Video Capability = "video"
 | |
| 	// Display capability
 | |
| 	Display Capability = "display"
 | |
| )
 | |
| 
 | |
| // AllCaps returns the complete list of supported Nvidia capabilities.
 | |
| func AllCaps() []Capability {
 | |
| 	return []Capability{
 | |
| 		Compute,
 | |
| 		Compat32,
 | |
| 		Graphics,
 | |
| 		Utility,
 | |
| 		Video,
 | |
| 		Display,
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithGPUs adds NVIDIA gpu support to a container
 | |
| func WithGPUs(opts ...Opts) oci.SpecOpts {
 | |
| 	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
 | |
| 		c := &config{}
 | |
| 		for _, o := range opts {
 | |
| 			if err := o(c); err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 		if c.OCIHookPath == "" {
 | |
| 			path, err := exec.LookPath("containerd")
 | |
| 			if err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 			c.OCIHookPath = path
 | |
| 		}
 | |
| 		nvidiaPath, err := exec.LookPath(NvidiaCLI)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if s.Hooks == nil {
 | |
| 			s.Hooks = &specs.Hooks{}
 | |
| 		}
 | |
| 		s.Hooks.CreateRuntime = append(s.Hooks.CreateRuntime, specs.Hook{
 | |
| 			Path: c.OCIHookPath,
 | |
| 			Args: append([]string{
 | |
| 				"containerd",
 | |
| 				"oci-hook",
 | |
| 				"--",
 | |
| 				nvidiaPath,
 | |
| 				// ensures the required kernel modules are properly loaded
 | |
| 				"--load-kmods",
 | |
| 			}, c.args()...),
 | |
| 			Env: os.Environ(),
 | |
| 		})
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type config struct {
 | |
| 	Devices      []string
 | |
| 	Capabilities []Capability
 | |
| 	LoadKmods    bool
 | |
| 	LDCache      string
 | |
| 	LDConfig     string
 | |
| 	Requirements []string
 | |
| 	OCIHookPath  string
 | |
| 	NoCgroups    bool
 | |
| }
 | |
| 
 | |
| func (c *config) args() []string {
 | |
| 	var args []string
 | |
| 
 | |
| 	if c.LoadKmods {
 | |
| 		args = append(args, "--load-kmods")
 | |
| 	}
 | |
| 	if c.LDCache != "" {
 | |
| 		args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
 | |
| 	}
 | |
| 	args = append(args,
 | |
| 		"configure",
 | |
| 	)
 | |
| 	if len(c.Devices) > 0 {
 | |
| 		args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
 | |
| 	}
 | |
| 	for _, c := range c.Capabilities {
 | |
| 		args = append(args, fmt.Sprintf("--%s", c))
 | |
| 	}
 | |
| 	if c.LDConfig != "" {
 | |
| 		args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
 | |
| 	}
 | |
| 	for _, r := range c.Requirements {
 | |
| 		args = append(args, fmt.Sprintf("--require=%s", r))
 | |
| 	}
 | |
| 	if c.NoCgroups {
 | |
| 		args = append(args, "--no-cgroups")
 | |
| 	}
 | |
| 	args = append(args, "--pid={{pid}}", "{{rootfs}}")
 | |
| 	return args
 | |
| }
 | |
| 
 | |
| // Opts are options for configuring gpu support
 | |
| type Opts func(*config) error
 | |
| 
 | |
| // WithDevices adds the provided device indexes to the container
 | |
| func WithDevices(ids ...int) Opts {
 | |
| 	return func(c *config) error {
 | |
| 		for _, i := range ids {
 | |
| 			c.Devices = append(c.Devices, strconv.Itoa(i))
 | |
| 		}
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithDeviceUUIDs adds the specific device UUID to the container
 | |
| func WithDeviceUUIDs(uuids ...string) Opts {
 | |
| 	return func(c *config) error {
 | |
| 		c.Devices = append(c.Devices, uuids...)
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithAllDevices adds all gpus to the container
 | |
| func WithAllDevices(c *config) error {
 | |
| 	c.Devices = []string{"all"}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // WithAllCapabilities adds all capabilities to the container for the gpus
 | |
| func WithAllCapabilities(c *config) error {
 | |
| 	c.Capabilities = AllCaps()
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // WithCapabilities adds the specified capabilities to the container for the gpus
 | |
| func WithCapabilities(caps ...Capability) Opts {
 | |
| 	return func(c *config) error {
 | |
| 		c.Capabilities = append(c.Capabilities, caps...)
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithRequiredCUDAVersion sets the required cuda version
 | |
| func WithRequiredCUDAVersion(major, minor int) Opts {
 | |
| 	return func(c *config) error {
 | |
| 		c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithOCIHookPath sets the hook path for the binary
 | |
| func WithOCIHookPath(path string) Opts {
 | |
| 	return func(c *config) error {
 | |
| 		c.OCIHookPath = path
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithLookupOCIHookPath sets the hook path for the binary via a binary name
 | |
| func WithLookupOCIHookPath(name string) Opts {
 | |
| 	return func(c *config) error {
 | |
| 		path, err := exec.LookPath(name)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		c.OCIHookPath = path
 | |
| 		return nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // WithNoCgroups passes --no-cgroups option to nvidia-container-cli.
 | |
| func WithNoCgroups(c *config) error {
 | |
| 	c.NoCgroups = true
 | |
| 	return nil
 | |
| }
 |