
Prestart Hook is deprecated and can be replaced with CreateRuntime hook Signed-off-by: Akhil Mohan <akhilerm@gmail.com>
222 lines
5.3 KiB
Go
222 lines
5.3 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package nvidia
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/containerd/containerd/v2/core/containers"
|
|
"github.com/containerd/containerd/v2/pkg/oci"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
)
|
|
|
|
// NvidiaCLI is the path to the Nvidia helper binary
|
|
const NvidiaCLI = "nvidia-container-cli"
|
|
|
|
// Capability specifies capabilities for the gpu inside the container
|
|
// Detailed explanation of options can be found:
|
|
// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
|
|
type Capability string
|
|
|
|
const (
|
|
// Compute capability
|
|
Compute Capability = "compute"
|
|
// Compat32 capability
|
|
Compat32 Capability = "compat32"
|
|
// Graphics capability
|
|
Graphics Capability = "graphics"
|
|
// Utility capability
|
|
Utility Capability = "utility"
|
|
// Video capability
|
|
Video Capability = "video"
|
|
// Display capability
|
|
Display Capability = "display"
|
|
)
|
|
|
|
// AllCaps returns the complete list of supported Nvidia capabilities.
|
|
func AllCaps() []Capability {
|
|
return []Capability{
|
|
Compute,
|
|
Compat32,
|
|
Graphics,
|
|
Utility,
|
|
Video,
|
|
Display,
|
|
}
|
|
}
|
|
|
|
// WithGPUs adds NVIDIA gpu support to a container
|
|
func WithGPUs(opts ...Opts) oci.SpecOpts {
|
|
return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
|
|
c := &config{}
|
|
for _, o := range opts {
|
|
if err := o(c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if c.OCIHookPath == "" {
|
|
path, err := exec.LookPath("containerd")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.OCIHookPath = path
|
|
}
|
|
nvidiaPath, err := exec.LookPath(NvidiaCLI)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if s.Hooks == nil {
|
|
s.Hooks = &specs.Hooks{}
|
|
}
|
|
s.Hooks.CreateRuntime = append(s.Hooks.CreateRuntime, specs.Hook{
|
|
Path: c.OCIHookPath,
|
|
Args: append([]string{
|
|
"containerd",
|
|
"oci-hook",
|
|
"--",
|
|
nvidiaPath,
|
|
// ensures the required kernel modules are properly loaded
|
|
"--load-kmods",
|
|
}, c.args()...),
|
|
Env: os.Environ(),
|
|
})
|
|
return nil
|
|
}
|
|
}
|
|
|
|
type config struct {
|
|
Devices []string
|
|
Capabilities []Capability
|
|
LoadKmods bool
|
|
LDCache string
|
|
LDConfig string
|
|
Requirements []string
|
|
OCIHookPath string
|
|
NoCgroups bool
|
|
}
|
|
|
|
func (c *config) args() []string {
|
|
var args []string
|
|
|
|
if c.LoadKmods {
|
|
args = append(args, "--load-kmods")
|
|
}
|
|
if c.LDCache != "" {
|
|
args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
|
|
}
|
|
args = append(args,
|
|
"configure",
|
|
)
|
|
if len(c.Devices) > 0 {
|
|
args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
|
|
}
|
|
for _, c := range c.Capabilities {
|
|
args = append(args, fmt.Sprintf("--%s", c))
|
|
}
|
|
if c.LDConfig != "" {
|
|
args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
|
|
}
|
|
for _, r := range c.Requirements {
|
|
args = append(args, fmt.Sprintf("--require=%s", r))
|
|
}
|
|
if c.NoCgroups {
|
|
args = append(args, "--no-cgroups")
|
|
}
|
|
args = append(args, "--pid={{pid}}", "{{rootfs}}")
|
|
return args
|
|
}
|
|
|
|
// Opts are options for configuring gpu support
|
|
type Opts func(*config) error
|
|
|
|
// WithDevices adds the provided device indexes to the container
|
|
func WithDevices(ids ...int) Opts {
|
|
return func(c *config) error {
|
|
for _, i := range ids {
|
|
c.Devices = append(c.Devices, strconv.Itoa(i))
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithDeviceUUIDs adds the specific device UUID to the container
|
|
func WithDeviceUUIDs(uuids ...string) Opts {
|
|
return func(c *config) error {
|
|
c.Devices = append(c.Devices, uuids...)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithAllDevices adds all gpus to the container
|
|
func WithAllDevices(c *config) error {
|
|
c.Devices = []string{"all"}
|
|
return nil
|
|
}
|
|
|
|
// WithAllCapabilities adds all capabilities to the container for the gpus
|
|
func WithAllCapabilities(c *config) error {
|
|
c.Capabilities = AllCaps()
|
|
return nil
|
|
}
|
|
|
|
// WithCapabilities adds the specified capabilities to the container for the gpus
|
|
func WithCapabilities(caps ...Capability) Opts {
|
|
return func(c *config) error {
|
|
c.Capabilities = append(c.Capabilities, caps...)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithRequiredCUDAVersion sets the required cuda version
|
|
func WithRequiredCUDAVersion(major, minor int) Opts {
|
|
return func(c *config) error {
|
|
c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithOCIHookPath sets the hook path for the binary
|
|
func WithOCIHookPath(path string) Opts {
|
|
return func(c *config) error {
|
|
c.OCIHookPath = path
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithLookupOCIHookPath sets the hook path for the binary via a binary name
|
|
func WithLookupOCIHookPath(name string) Opts {
|
|
return func(c *config) error {
|
|
path, err := exec.LookPath(name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.OCIHookPath = path
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithNoCgroups passes --no-cgroups option to nvidia-container-cli.
|
|
func WithNoCgroups(c *config) error {
|
|
c.NoCgroups = true
|
|
return nil
|
|
}
|