Add nvidia gpu support via libnvidia-container
This adds nvidia gpu support via the libnvidia-container project and `nvidia-container-cli`. Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
544557289a
commit
b949697a9c
@ -31,7 +31,7 @@ import (
|
|||||||
|
|
||||||
var ociHook = cli.Command{
|
var ociHook = cli.Command{
|
||||||
Name: "oci-hook",
|
Name: "oci-hook",
|
||||||
Usage: "provides a base for OCI runtime hooks that allow arguements to be templated.",
|
Usage: "provides a base for OCI runtime hooks to allow arguments to be injected.",
|
||||||
Action: func(context *cli.Context) error {
|
Action: func(context *cli.Context) error {
|
||||||
state, err := loadHookState(os.Stdin)
|
state, err := loadHookState(os.Stdin)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -70,6 +70,7 @@ func newTemplateContext(state *specs.State) *templateContext {
|
|||||||
"rootfs": t.rootfs,
|
"rootfs": t.rootfs,
|
||||||
"pid": t.pid,
|
"pid": t.pid,
|
||||||
"annotation": t.annotation,
|
"annotation": t.annotation,
|
||||||
|
"status": t.status,
|
||||||
}
|
}
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
@ -99,6 +100,10 @@ func (t *templateContext) annotation(k string) string {
|
|||||||
return t.state.Annotations[k]
|
return t.state.Annotations[k]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *templateContext) status() string {
|
||||||
|
return t.state.Status
|
||||||
|
}
|
||||||
|
|
||||||
func render(ctx *templateContext, source string, out io.Writer) error {
|
func render(ctx *templateContext, source string, out io.Writer) error {
|
||||||
t, err := template.New("oci-hook").Funcs(ctx.funcs).Parse(source)
|
t, err := template.New("oci-hook").Funcs(ctx.funcs).Parse(source)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -93,6 +93,10 @@ var ContainerFlags = []cli.Flag{
|
|||||||
Name: "pid-file",
|
Name: "pid-file",
|
||||||
Usage: "file path to write the task's pid",
|
Usage: "file path to write the task's pid",
|
||||||
},
|
},
|
||||||
|
cli.IntFlag{
|
||||||
|
Name: "gpus",
|
||||||
|
Usage: "add gpus to the container",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadSpec(path string, s *specs.Spec) error {
|
func loadSpec(path string, s *specs.Spec) error {
|
||||||
|
@ -24,6 +24,7 @@ import (
|
|||||||
|
|
||||||
"github.com/containerd/containerd"
|
"github.com/containerd/containerd"
|
||||||
"github.com/containerd/containerd/cmd/ctr/commands"
|
"github.com/containerd/containerd/cmd/ctr/commands"
|
||||||
|
"github.com/containerd/containerd/contrib/nvidia"
|
||||||
"github.com/containerd/containerd/oci"
|
"github.com/containerd/containerd/oci"
|
||||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
@ -123,6 +124,9 @@ func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli
|
|||||||
Path: parts[1],
|
Path: parts[1],
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
if context.IsSet("gpus") {
|
||||||
|
opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities))
|
||||||
|
}
|
||||||
if context.IsSet("config") {
|
if context.IsSet("config") {
|
||||||
var s specs.Spec
|
var s specs.Spec
|
||||||
if err := loadSpec(context.String("config"), &s); err != nil {
|
if err := loadSpec(context.String("config"), &s); err != nil {
|
||||||
|
185
contrib/nvidia/nvidia.go
Normal file
185
contrib/nvidia/nvidia.go
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package nvidia
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/containers"
|
||||||
|
"github.com/containerd/containerd/oci"
|
||||||
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
const nvidiaCLI = "nvidia-container-cli"
|
||||||
|
|
||||||
|
// Capability specifies capabilities for the gpu inside the container
|
||||||
|
// Detailed explaination of options can be found:
|
||||||
|
// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
|
||||||
|
type Capability int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Compute capability
|
||||||
|
Compute Capability = iota + 1
|
||||||
|
// Compat32 capability
|
||||||
|
Compat32
|
||||||
|
// Graphics capability
|
||||||
|
Graphics
|
||||||
|
// Utility capability
|
||||||
|
Utility
|
||||||
|
// Video capability
|
||||||
|
Video
|
||||||
|
// Display capability
|
||||||
|
Display
|
||||||
|
)
|
||||||
|
|
||||||
|
// WithGPUs adds NVIDIA gpu support to a container
|
||||||
|
func WithGPUs(opts ...Opts) oci.SpecOpts {
|
||||||
|
return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
|
||||||
|
c := &config{}
|
||||||
|
for _, o := range opts {
|
||||||
|
if err := o(c); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
path, err := exec.LookPath("containerd")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
nvidiaPath, err := exec.LookPath(nvidiaCLI)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Hooks == nil {
|
||||||
|
s.Hooks = &specs.Hooks{}
|
||||||
|
}
|
||||||
|
s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
|
||||||
|
Path: path,
|
||||||
|
Args: append([]string{
|
||||||
|
"containerd",
|
||||||
|
"oci-hook",
|
||||||
|
"--",
|
||||||
|
nvidiaPath,
|
||||||
|
}, c.args()...),
|
||||||
|
Env: os.Environ(),
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type config struct {
|
||||||
|
Devices []int
|
||||||
|
DeviceUUID string
|
||||||
|
Capabilities []Capability
|
||||||
|
LoadKmods bool
|
||||||
|
LDCache string
|
||||||
|
LDConfig string
|
||||||
|
Requirements []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *config) args() []string {
|
||||||
|
var args []string
|
||||||
|
|
||||||
|
if c.LoadKmods {
|
||||||
|
args = append(args, "--load-kmods")
|
||||||
|
}
|
||||||
|
if c.LDCache != "" {
|
||||||
|
args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
|
||||||
|
}
|
||||||
|
args = append(args,
|
||||||
|
"configure",
|
||||||
|
)
|
||||||
|
if len(c.Devices) > 0 {
|
||||||
|
args = append(args, fmt.Sprintf("--device=%s", strings.Join(toStrings(c.Devices), ",")))
|
||||||
|
}
|
||||||
|
if c.DeviceUUID != "" {
|
||||||
|
args = append(args, fmt.Sprintf("--device=%s", c.DeviceUUID))
|
||||||
|
}
|
||||||
|
for _, c := range c.Capabilities {
|
||||||
|
args = append(args, fmt.Sprintf("--%s", capFlags[c]))
|
||||||
|
}
|
||||||
|
if c.LDConfig != "" {
|
||||||
|
args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
|
||||||
|
}
|
||||||
|
for _, r := range c.Requirements {
|
||||||
|
args = append(args, fmt.Sprintf("--require=%s", r))
|
||||||
|
}
|
||||||
|
args = append(args, "--pid={{pid}}", "{{rootfs}}")
|
||||||
|
return args
|
||||||
|
}
|
||||||
|
|
||||||
|
var capFlags = map[Capability]string{
|
||||||
|
Compute: "compute",
|
||||||
|
Compat32: "compat32",
|
||||||
|
Graphics: "graphics",
|
||||||
|
Utility: "utility",
|
||||||
|
Video: "video",
|
||||||
|
Display: "display",
|
||||||
|
}
|
||||||
|
|
||||||
|
func toStrings(ints []int) []string {
|
||||||
|
var s []string
|
||||||
|
for _, i := range ints {
|
||||||
|
s = append(s, strconv.Itoa(i))
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Opts are options for configuring gpu support
|
||||||
|
type Opts func(*config) error
|
||||||
|
|
||||||
|
// WithDevices adds the provided device indexes to the container
|
||||||
|
func WithDevices(ids ...int) Opts {
|
||||||
|
return func(c *config) error {
|
||||||
|
c.Devices = ids
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithDeviceUUID adds the specific device UUID to the container
|
||||||
|
func WithDeviceUUID(guid string) Opts {
|
||||||
|
return func(c *config) error {
|
||||||
|
c.DeviceUUID = guid
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithAllDevices adds all gpus to the container
|
||||||
|
func WithAllDevices(c *config) error {
|
||||||
|
c.DeviceUUID = "all"
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithAllCapabilities adds all capabilities to the container for the gpus
|
||||||
|
func WithAllCapabilities(c *config) error {
|
||||||
|
for k := range capFlags {
|
||||||
|
c.Capabilities = append(c.Capabilities, k)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithRequiredCUDAVersion sets the required cuda version
|
||||||
|
func WithRequiredCUDAVersion(major, minor int) Opts {
|
||||||
|
return func(c *config) error {
|
||||||
|
c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user