containerd/contrib/nvidia/nvidia.go
Sebastiaan van Stijn 2af6db672e
switch back from golang.org/x/sys/execabs to os/exec (go1.19)
This is effectively a revert of 2ac9968401, which
switched from os/exec to the golang.org/x/sys/execabs package to mitigate
security issues (mainly on Windows) with lookups resolving to binaries in the
current directory.

from the go1.19 release notes https://go.dev/doc/go1.19#os-exec-path

> ## PATH lookups
>
> Command and LookPath no longer allow results from a PATH search to be found
> relative to the current directory. This removes a common source of security
> problems but may also break existing programs that depend on using, say,
> exec.Command("prog") to run a binary named prog (or, on Windows, prog.exe) in
> the current directory. See the os/exec package documentation for information
> about how best to update such programs.
>
> On Windows, Command and LookPath now respect the NoDefaultCurrentDirectoryInExePath
> environment variable, making it possible to disable the default implicit search
> of “.” in PATH lookups on Windows systems.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2023-11-02 21:15:40 +01:00

222 lines
5.3 KiB
Go

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nvidia
import (
"context"
"fmt"
"os"
"os/exec"
"strconv"
"strings"
"github.com/containerd/containerd/v2/containers"
"github.com/containerd/containerd/v2/oci"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
// NvidiaCLI is the path to the Nvidia helper binary
const NvidiaCLI = "nvidia-container-cli"
// Capability specifies capabilities for the gpu inside the container
// Detailed explanation of options can be found:
// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
type Capability string
const (
// Compute capability
Compute Capability = "compute"
// Compat32 capability
Compat32 Capability = "compat32"
// Graphics capability
Graphics Capability = "graphics"
// Utility capability
Utility Capability = "utility"
// Video capability
Video Capability = "video"
// Display capability
Display Capability = "display"
)
// AllCaps returns the complete list of supported Nvidia capabilities.
func AllCaps() []Capability {
return []Capability{
Compute,
Compat32,
Graphics,
Utility,
Video,
Display,
}
}
// WithGPUs adds NVIDIA gpu support to a container
func WithGPUs(opts ...Opts) oci.SpecOpts {
return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
c := &config{}
for _, o := range opts {
if err := o(c); err != nil {
return err
}
}
if c.OCIHookPath == "" {
path, err := exec.LookPath("containerd")
if err != nil {
return err
}
c.OCIHookPath = path
}
nvidiaPath, err := exec.LookPath(NvidiaCLI)
if err != nil {
return err
}
if s.Hooks == nil {
s.Hooks = &specs.Hooks{}
}
s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
Path: c.OCIHookPath,
Args: append([]string{
"containerd",
"oci-hook",
"--",
nvidiaPath,
// ensures the required kernel modules are properly loaded
"--load-kmods",
}, c.args()...),
Env: os.Environ(),
})
return nil
}
}
type config struct {
Devices []string
Capabilities []Capability
LoadKmods bool
LDCache string
LDConfig string
Requirements []string
OCIHookPath string
NoCgroups bool
}
func (c *config) args() []string {
var args []string
if c.LoadKmods {
args = append(args, "--load-kmods")
}
if c.LDCache != "" {
args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
}
args = append(args,
"configure",
)
if len(c.Devices) > 0 {
args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
}
for _, c := range c.Capabilities {
args = append(args, fmt.Sprintf("--%s", c))
}
if c.LDConfig != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
}
for _, r := range c.Requirements {
args = append(args, fmt.Sprintf("--require=%s", r))
}
if c.NoCgroups {
args = append(args, "--no-cgroups")
}
args = append(args, "--pid={{pid}}", "{{rootfs}}")
return args
}
// Opts are options for configuring gpu support
type Opts func(*config) error
// WithDevices adds the provided device indexes to the container
func WithDevices(ids ...int) Opts {
return func(c *config) error {
for _, i := range ids {
c.Devices = append(c.Devices, strconv.Itoa(i))
}
return nil
}
}
// WithDeviceUUIDs adds the specific device UUID to the container
func WithDeviceUUIDs(uuids ...string) Opts {
return func(c *config) error {
c.Devices = append(c.Devices, uuids...)
return nil
}
}
// WithAllDevices adds all gpus to the container
func WithAllDevices(c *config) error {
c.Devices = []string{"all"}
return nil
}
// WithAllCapabilities adds all capabilities to the container for the gpus
func WithAllCapabilities(c *config) error {
c.Capabilities = AllCaps()
return nil
}
// WithCapabilities adds the specified capabilities to the container for the gpus
func WithCapabilities(caps ...Capability) Opts {
return func(c *config) error {
c.Capabilities = append(c.Capabilities, caps...)
return nil
}
}
// WithRequiredCUDAVersion sets the required cuda version
func WithRequiredCUDAVersion(major, minor int) Opts {
return func(c *config) error {
c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
return nil
}
}
// WithOCIHookPath sets the hook path for the binary
func WithOCIHookPath(path string) Opts {
return func(c *config) error {
c.OCIHookPath = path
return nil
}
}
// WithLookupOCIHookPath sets the hook path for the binary via a binary name
func WithLookupOCIHookPath(name string) Opts {
return func(c *config) error {
path, err := exec.LookPath(name)
if err != nil {
return err
}
c.OCIHookPath = path
return nil
}
}
// WithNoCgroups passes --no-cgroups option to nvidia-container-cli.
func WithNoCgroups(c *config) error {
c.NoCgroups = true
return nil
}