
Test suite: ```yaml --- apiVersion: v1 kind: Pod metadata: name: test-no-option annotations: description: "Equivalent of `docker run` (no option)" spec: restartPolicy: Never containers: - name: main image: ghcr.io/containerd/busybox:1.28 args: ['sh', '-euxc', '[ "$(id)" = "uid=0(root) gid=0(root) groups=0(root),10(wheel)" ]'] --- apiVersion: v1 kind: Pod metadata: name: test-group-add-1-group-add-1234 annotations: description: "Equivalent of `docker run --group-add 1 --group-add 1234`" spec: restartPolicy: Never containers: - name: main image: ghcr.io/containerd/busybox:1.28 args: ['sh', '-euxc', '[ "$(id)" = "uid=0(root) gid=0(root) groups=0(root),1(daemon),10(wheel),1234" ]'] securityContext: supplementalGroups: [1, 1234] --- apiVersion: v1 kind: Pod metadata: name: test-user-1234 annotations: description: "Equivalent of `docker run --user 1234`" spec: restartPolicy: Never containers: - name: main image: ghcr.io/containerd/busybox:1.28 args: ['sh', '-euxc', '[ "$(id)" = "uid=1234 gid=0(root) groups=0(root)" ]'] securityContext: runAsUser: 1234 --- apiVersion: v1 kind: Pod metadata: name: test-user-1234-1234 annotations: description: "Equivalent of `docker run --user 1234:1234`" spec: restartPolicy: Never containers: - name: main image: ghcr.io/containerd/busybox:1.28 args: ['sh', '-euxc', '[ "$(id)" = "uid=1234 gid=1234 groups=1234" ]'] securityContext: runAsUser: 1234 runAsGroup: 1234 --- apiVersion: v1 kind: Pod metadata: name: test-user-1234-group-add-1234 annotations: description: "Equivalent of `docker run --user 1234 --group-add 1234`" spec: restartPolicy: Never containers: - name: main image: ghcr.io/containerd/busybox:1.28 args: ['sh', '-euxc', '[ "$(id)" = "uid=1234 gid=0(root) groups=0(root),1234" ]'] securityContext: runAsUser: 1234 supplementalGroups: [1234] ``` Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
1625 lines
46 KiB
Go
1625 lines
46 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package oci
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/containerd/containerd/containers"
|
|
"github.com/containerd/containerd/content"
|
|
"github.com/containerd/containerd/images"
|
|
"github.com/containerd/containerd/mount"
|
|
"github.com/containerd/containerd/namespaces"
|
|
"github.com/containerd/containerd/platforms"
|
|
"github.com/containerd/continuity/fs"
|
|
v1 "github.com/opencontainers/image-spec/specs-go/v1"
|
|
"github.com/opencontainers/runc/libcontainer/user"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
)
|
|
|
|
// SpecOpts sets spec specific information to a newly generated OCI spec
|
|
type SpecOpts func(context.Context, Client, *containers.Container, *Spec) error
|
|
|
|
// Compose converts a sequence of spec operations into a single operation
|
|
func Compose(opts ...SpecOpts) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
|
|
for _, o := range opts {
|
|
if err := o(ctx, client, c, s); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// setProcess sets Process to empty if unset
|
|
func setProcess(s *Spec) {
|
|
if s.Process == nil {
|
|
s.Process = &specs.Process{}
|
|
}
|
|
}
|
|
|
|
// setRoot sets Root to empty if unset
|
|
func setRoot(s *Spec) {
|
|
if s.Root == nil {
|
|
s.Root = &specs.Root{}
|
|
}
|
|
}
|
|
|
|
// setLinux sets Linux to empty if unset
|
|
func setLinux(s *Spec) {
|
|
if s.Linux == nil {
|
|
s.Linux = &specs.Linux{}
|
|
}
|
|
}
|
|
|
|
func setResources(s *Spec) {
|
|
if s.Linux != nil {
|
|
if s.Linux.Resources == nil {
|
|
s.Linux.Resources = &specs.LinuxResources{}
|
|
}
|
|
}
|
|
}
|
|
|
|
//nolint:nolintlint,unused // not used on all platforms
|
|
func setResourcesWindows(s *Spec) {
|
|
if s.Windows != nil {
|
|
if s.Windows.Resources == nil {
|
|
s.Windows.Resources = &specs.WindowsResources{}
|
|
}
|
|
}
|
|
}
|
|
|
|
//nolint:nolintlint,unused // not used on all platforms
|
|
func setCPU(s *Spec) {
|
|
setResources(s)
|
|
if s.Linux != nil {
|
|
if s.Linux.Resources.CPU == nil {
|
|
s.Linux.Resources.CPU = &specs.LinuxCPU{}
|
|
}
|
|
}
|
|
}
|
|
|
|
//nolint:nolintlint,unused // not used on all platforms
|
|
func setCPUWindows(s *Spec) {
|
|
setResourcesWindows(s)
|
|
if s.Windows != nil {
|
|
if s.Windows.Resources.CPU == nil {
|
|
s.Windows.Resources.CPU = &specs.WindowsCPUResources{}
|
|
}
|
|
}
|
|
}
|
|
|
|
// setCapabilities sets Linux Capabilities to empty if unset
|
|
func setCapabilities(s *Spec) {
|
|
setProcess(s)
|
|
if s.Process.Capabilities == nil {
|
|
s.Process.Capabilities = &specs.LinuxCapabilities{}
|
|
}
|
|
}
|
|
|
|
// ensureAdditionalGids ensures that the primary GID is also included in the additional GID list.
|
|
func ensureAdditionalGids(s *Spec) {
|
|
setProcess(s)
|
|
for _, f := range s.Process.User.AdditionalGids {
|
|
if f == s.Process.User.GID {
|
|
return
|
|
}
|
|
}
|
|
s.Process.User.AdditionalGids = append([]uint32{s.Process.User.GID}, s.Process.User.AdditionalGids...)
|
|
}
|
|
|
|
// WithDefaultSpec returns a SpecOpts that will populate the spec with default
|
|
// values.
|
|
//
|
|
// Use as the first option to clear the spec, then apply options afterwards.
|
|
func WithDefaultSpec() SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
return generateDefaultSpecWithPlatform(ctx, platforms.DefaultString(), c.ID, s)
|
|
}
|
|
}
|
|
|
|
// WithDefaultSpecForPlatform returns a SpecOpts that will populate the spec
|
|
// with default values for a given platform.
|
|
//
|
|
// Use as the first option to clear the spec, then apply options afterwards.
|
|
func WithDefaultSpecForPlatform(platform string) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
return generateDefaultSpecWithPlatform(ctx, platform, c.ID, s)
|
|
}
|
|
}
|
|
|
|
// WithSpecFromBytes loads the spec from the provided byte slice.
|
|
func WithSpecFromBytes(p []byte) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
*s = Spec{} // make sure spec is cleared.
|
|
if err := json.Unmarshal(p, s); err != nil {
|
|
return fmt.Errorf("decoding spec config file failed, current supported OCI runtime-spec : v%s: %w", specs.Version, err)
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithSpecFromFile loads the specification from the provided filename.
|
|
func WithSpecFromFile(filename string) SpecOpts {
|
|
return func(ctx context.Context, c Client, container *containers.Container, s *Spec) error {
|
|
p, err := os.ReadFile(filename)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot load spec config file: %w", err)
|
|
}
|
|
return WithSpecFromBytes(p)(ctx, c, container, s)
|
|
}
|
|
}
|
|
|
|
// WithEnv appends environment variables
|
|
func WithEnv(environmentVariables []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if len(environmentVariables) > 0 {
|
|
setProcess(s)
|
|
s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, environmentVariables)
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithDefaultPathEnv sets the $PATH environment variable to the
|
|
// default PATH defined in this package.
|
|
func WithDefaultPathEnv(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, defaultUnixEnv)
|
|
return nil
|
|
}
|
|
|
|
// replaceOrAppendEnvValues returns the defaults with the overrides either
|
|
// replaced by env key or appended to the list
|
|
func replaceOrAppendEnvValues(defaults, overrides []string) []string {
|
|
cache := make(map[string]int, len(defaults))
|
|
results := make([]string, 0, len(defaults))
|
|
for i, e := range defaults {
|
|
k, _, _ := strings.Cut(e, "=")
|
|
results = append(results, e)
|
|
cache[k] = i
|
|
}
|
|
|
|
for _, value := range overrides {
|
|
// Values w/o = means they want this env to be removed/unset.
|
|
k, _, ok := strings.Cut(value, "=")
|
|
if !ok {
|
|
if i, exists := cache[k]; exists {
|
|
results[i] = "" // Used to indicate it should be removed
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Just do a normal set/update
|
|
if i, exists := cache[k]; exists {
|
|
results[i] = value
|
|
} else {
|
|
results = append(results, value)
|
|
}
|
|
}
|
|
|
|
// Now remove all entries that we want to "unset"
|
|
for i := 0; i < len(results); i++ {
|
|
if results[i] == "" {
|
|
results = append(results[:i], results[i+1:]...)
|
|
i--
|
|
}
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
// WithProcessArgs replaces the args on the generated spec
|
|
func WithProcessArgs(args ...string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.Args = args
|
|
s.Process.CommandLine = ""
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithProcessCwd replaces the current working directory on the generated spec
|
|
func WithProcessCwd(cwd string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.Cwd = cwd
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithTTY sets the information on the spec as well as the environment variables for
|
|
// using a TTY
|
|
func WithTTY(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.Terminal = true
|
|
if s.Linux != nil {
|
|
s.Process.Env = append(s.Process.Env, "TERM=xterm")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// WithTTYSize sets the information on the spec as well as the environment variables for
|
|
// using a TTY
|
|
func WithTTYSize(width, height int) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
if s.Process.ConsoleSize == nil {
|
|
s.Process.ConsoleSize = &specs.Box{}
|
|
}
|
|
s.Process.ConsoleSize.Width = uint(width)
|
|
s.Process.ConsoleSize.Height = uint(height)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithHostname sets the container's hostname
|
|
func WithHostname(name string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Hostname = name
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithDomainname sets the container's NIS domain name
|
|
func WithDomainname(name string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Domainname = name
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithMounts appends mounts
|
|
func WithMounts(mounts []specs.Mount) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Mounts = append(s.Mounts, mounts...)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithoutMounts removes mounts
|
|
func WithoutMounts(dests ...string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
var (
|
|
mounts []specs.Mount
|
|
current = s.Mounts
|
|
)
|
|
mLoop:
|
|
for _, m := range current {
|
|
mDestination := filepath.Clean(m.Destination)
|
|
for _, dest := range dests {
|
|
if mDestination == dest {
|
|
continue mLoop
|
|
}
|
|
}
|
|
mounts = append(mounts, m)
|
|
}
|
|
s.Mounts = mounts
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithHostNamespace allows a task to run inside the host's linux namespace
|
|
func WithHostNamespace(ns specs.LinuxNamespaceType) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
for i, n := range s.Linux.Namespaces {
|
|
if n.Type == ns {
|
|
s.Linux.Namespaces = append(s.Linux.Namespaces[:i], s.Linux.Namespaces[i+1:]...)
|
|
return nil
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithLinuxNamespace uses the passed in namespace for the spec. If a namespace of the same type already exists in the
|
|
// spec, the existing namespace is replaced by the one provided.
|
|
func WithLinuxNamespace(ns specs.LinuxNamespace) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
for i, n := range s.Linux.Namespaces {
|
|
if n.Type == ns.Type {
|
|
s.Linux.Namespaces[i] = ns
|
|
return nil
|
|
}
|
|
}
|
|
s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithNewPrivileges turns off the NoNewPrivileges feature flag in the spec
|
|
func WithNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.NoNewPrivileges = false
|
|
|
|
return nil
|
|
}
|
|
|
|
// WithImageConfig configures the spec to from the configuration of an Image
|
|
func WithImageConfig(image Image) SpecOpts {
|
|
return WithImageConfigArgs(image, nil)
|
|
}
|
|
|
|
// WithImageConfigArgs configures the spec to from the configuration of an Image with additional args that
|
|
// replaces the CMD of the image
|
|
func WithImageConfigArgs(image Image, args []string) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
|
|
ic, err := image.Config(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var (
|
|
imageConfigBytes []byte
|
|
ociimage v1.Image
|
|
config v1.ImageConfig
|
|
)
|
|
switch ic.MediaType {
|
|
case v1.MediaTypeImageConfig, images.MediaTypeDockerSchema2Config:
|
|
var err error
|
|
imageConfigBytes, err = content.ReadBlob(ctx, image.ContentStore(), ic)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := json.Unmarshal(imageConfigBytes, &ociimage); err != nil {
|
|
return err
|
|
}
|
|
config = ociimage.Config
|
|
default:
|
|
return fmt.Errorf("unknown image config media type %s", ic.MediaType)
|
|
}
|
|
|
|
appendOSMounts(s, ociimage.OS)
|
|
setProcess(s)
|
|
if s.Linux != nil {
|
|
defaults := config.Env
|
|
if len(defaults) == 0 {
|
|
defaults = defaultUnixEnv
|
|
}
|
|
s.Process.Env = replaceOrAppendEnvValues(defaults, s.Process.Env)
|
|
cmd := config.Cmd
|
|
if len(args) > 0 {
|
|
cmd = args
|
|
}
|
|
s.Process.Args = append(config.Entrypoint, cmd...)
|
|
|
|
cwd := config.WorkingDir
|
|
if cwd == "" {
|
|
cwd = "/"
|
|
}
|
|
s.Process.Cwd = cwd
|
|
if config.User != "" {
|
|
if err := WithUser(config.User)(ctx, client, c, s); err != nil {
|
|
return err
|
|
}
|
|
return WithAdditionalGIDs(fmt.Sprintf("%d", s.Process.User.UID))(ctx, client, c, s)
|
|
}
|
|
// we should query the image's /etc/group for additional GIDs
|
|
// even if there is no specified user in the image config
|
|
return WithAdditionalGIDs("root")(ctx, client, c, s)
|
|
} else if s.Windows != nil {
|
|
s.Process.Env = replaceOrAppendEnvValues(config.Env, s.Process.Env)
|
|
|
|
// To support Docker ArgsEscaped on Windows we need to combine the
|
|
// image Entrypoint & (Cmd Or User Args) while taking into account
|
|
// if Docker has already escaped them in the image config. When
|
|
// Docker sets `ArgsEscaped==true` in the config it has pre-escaped
|
|
// either Entrypoint or Cmd or both. Cmd should always be treated as
|
|
// arguments appended to Entrypoint unless:
|
|
//
|
|
// 1. Entrypoint does not exist, in which case Cmd[0] is the
|
|
// executable.
|
|
//
|
|
// 2. The user overrides the Cmd with User Args when activating the
|
|
// container in which case those args should be appended to the
|
|
// Entrypoint if it exists.
|
|
//
|
|
// To effectively do this we need to know if the arguments came from
|
|
// the user or if the arguments came from the image config when
|
|
// ArgsEscaped==true. In this case we only want to escape the
|
|
// additional user args when forming the complete CommandLine. This
|
|
// is safe in both cases of Entrypoint or Cmd being set because
|
|
// Docker will always escape them to an array of length one. Thus in
|
|
// both cases it is the "executable" portion of the command.
|
|
//
|
|
// In the case ArgsEscaped==false, Entrypoint or Cmd will contain
|
|
// any number of entries that are all unescaped and can simply be
|
|
// combined (potentially overwriting Cmd with User Args if present)
|
|
// and forwarded the container start as an Args array.
|
|
cmd := config.Cmd
|
|
cmdFromImage := true
|
|
if len(args) > 0 {
|
|
cmd = args
|
|
cmdFromImage = false
|
|
}
|
|
|
|
cmd = append(config.Entrypoint, cmd...)
|
|
if len(cmd) == 0 {
|
|
return errors.New("no arguments specified")
|
|
}
|
|
|
|
if config.ArgsEscaped && (len(config.Entrypoint) > 0 || cmdFromImage) {
|
|
s.Process.Args = nil
|
|
s.Process.CommandLine = cmd[0]
|
|
if len(cmd) > 1 {
|
|
s.Process.CommandLine += " " + escapeAndCombineArgs(cmd[1:])
|
|
}
|
|
} else {
|
|
s.Process.Args = cmd
|
|
s.Process.CommandLine = ""
|
|
}
|
|
|
|
s.Process.Cwd = config.WorkingDir
|
|
s.Process.User = specs.User{
|
|
Username: config.User,
|
|
}
|
|
} else {
|
|
return errors.New("spec does not contain Linux or Windows section")
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithRootFSPath specifies unmanaged rootfs path.
|
|
func WithRootFSPath(path string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setRoot(s)
|
|
s.Root.Path = path
|
|
// Entrypoint is not set here (it's up to caller)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithRootFSReadonly sets specs.Root.Readonly to true
|
|
func WithRootFSReadonly() SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setRoot(s)
|
|
s.Root.Readonly = true
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithNoNewPrivileges sets no_new_privileges on the process for the container
|
|
func WithNoNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.NoNewPrivileges = true
|
|
return nil
|
|
}
|
|
|
|
// WithHostHostsFile bind-mounts the host's /etc/hosts into the container as readonly
|
|
func WithHostHostsFile(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Mounts = append(s.Mounts, specs.Mount{
|
|
Destination: "/etc/hosts",
|
|
Type: "bind",
|
|
Source: "/etc/hosts",
|
|
Options: []string{"rbind", "ro"},
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// WithHostResolvconf bind-mounts the host's /etc/resolv.conf into the container as readonly
|
|
func WithHostResolvconf(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Mounts = append(s.Mounts, specs.Mount{
|
|
Destination: "/etc/resolv.conf",
|
|
Type: "bind",
|
|
Source: "/etc/resolv.conf",
|
|
Options: []string{"rbind", "ro"},
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// WithHostLocaltime bind-mounts the host's /etc/localtime into the container as readonly
|
|
func WithHostLocaltime(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
s.Mounts = append(s.Mounts, specs.Mount{
|
|
Destination: "/etc/localtime",
|
|
Type: "bind",
|
|
Source: "/etc/localtime",
|
|
Options: []string{"rbind", "ro"},
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// WithUserNamespace sets the uid and gid mappings for the task
|
|
// this can be called multiple times to add more mappings to the generated spec
|
|
func WithUserNamespace(uidMap, gidMap []specs.LinuxIDMapping) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
var hasUserns bool
|
|
setLinux(s)
|
|
for _, ns := range s.Linux.Namespaces {
|
|
if ns.Type == specs.UserNamespace {
|
|
hasUserns = true
|
|
break
|
|
}
|
|
}
|
|
if !hasUserns {
|
|
s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{
|
|
Type: specs.UserNamespace,
|
|
})
|
|
}
|
|
s.Linux.UIDMappings = append(s.Linux.UIDMappings, uidMap...)
|
|
s.Linux.GIDMappings = append(s.Linux.GIDMappings, gidMap...)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCgroup sets the container's cgroup path
|
|
func WithCgroup(path string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
s.Linux.CgroupsPath = path
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithNamespacedCgroup uses the namespace set on the context to create a
|
|
// root directory for containers in the cgroup with the id as the subcgroup
|
|
func WithNamespacedCgroup() SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
namespace, err := namespaces.NamespaceRequired(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
setLinux(s)
|
|
s.Linux.CgroupsPath = filepath.Join("/", namespace, c.ID)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithUser sets the user to be used within the container.
|
|
// It accepts a valid user string in OCI Image Spec v1.0.0:
|
|
//
|
|
// user, uid, user:group, uid:gid, uid:group, user:gid
|
|
func WithUser(userstr string) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
|
|
defer ensureAdditionalGids(s)
|
|
setProcess(s)
|
|
s.Process.User.AdditionalGids = nil
|
|
|
|
// For LCOW it's a bit harder to confirm that the user actually exists on the host as a rootfs isn't
|
|
// mounted on the host and shared into the guest, but rather the rootfs is constructed entirely in the
|
|
// guest itself. To accommodate this, a spot to place the user string provided by a client as-is is needed.
|
|
// The `Username` field on the runtime spec is marked by Platform as only for Windows, and in this case it
|
|
// *is* being set on a Windows host at least, but will be used as a temporary holding spot until the guest
|
|
// can use the string to perform these same operations to grab the uid:gid inside.
|
|
if s.Windows != nil && s.Linux != nil {
|
|
s.Process.User.Username = userstr
|
|
return nil
|
|
}
|
|
|
|
parts := strings.Split(userstr, ":")
|
|
switch len(parts) {
|
|
case 1:
|
|
v, err := strconv.Atoi(parts[0])
|
|
if err != nil {
|
|
// if we cannot parse as a uint they try to see if it is a username
|
|
return WithUsername(userstr)(ctx, client, c, s)
|
|
}
|
|
return WithUserID(uint32(v))(ctx, client, c, s)
|
|
case 2:
|
|
var (
|
|
username string
|
|
groupname string
|
|
)
|
|
var uid, gid uint32
|
|
v, err := strconv.Atoi(parts[0])
|
|
if err != nil {
|
|
username = parts[0]
|
|
} else {
|
|
uid = uint32(v)
|
|
}
|
|
if v, err = strconv.Atoi(parts[1]); err != nil {
|
|
groupname = parts[1]
|
|
} else {
|
|
gid = uint32(v)
|
|
}
|
|
if username == "" && groupname == "" {
|
|
s.Process.User.UID, s.Process.User.GID = uid, gid
|
|
return nil
|
|
}
|
|
f := func(root string) error {
|
|
if username != "" {
|
|
user, err := UserFromPath(root, func(u user.User) bool {
|
|
return u.Name == username
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
uid = uint32(user.Uid)
|
|
}
|
|
if groupname != "" {
|
|
gid, err = GIDFromPath(root, func(g user.Group) bool {
|
|
return g.Name == groupname
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
s.Process.User.UID, s.Process.User.GID = uid, gid
|
|
return nil
|
|
}
|
|
if c.Snapshotter == "" && c.SnapshotKey == "" {
|
|
if !isRootfsAbs(s.Root.Path) {
|
|
return errors.New("rootfs absolute path is required")
|
|
}
|
|
return f(s.Root.Path)
|
|
}
|
|
if c.Snapshotter == "" {
|
|
return errors.New("no snapshotter set for container")
|
|
}
|
|
if c.SnapshotKey == "" {
|
|
return errors.New("rootfs snapshot not created for container")
|
|
}
|
|
snapshotter := client.SnapshotService(c.Snapshotter)
|
|
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mounts = tryReadonlyMounts(mounts)
|
|
return mount.WithTempMount(ctx, mounts, f)
|
|
default:
|
|
return fmt.Errorf("invalid USER value %s", userstr)
|
|
}
|
|
}
|
|
}
|
|
|
|
// WithUIDGID allows the UID and GID for the Process to be set
|
|
func WithUIDGID(uid, gid uint32) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
defer ensureAdditionalGids(s)
|
|
setProcess(s)
|
|
s.Process.User.AdditionalGids = nil
|
|
s.Process.User.UID = uid
|
|
s.Process.User.GID = gid
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithUserID sets the correct UID and GID for the container based
|
|
// on the image's /etc/passwd contents. If /etc/passwd does not exist,
|
|
// or uid is not found in /etc/passwd, it sets the requested uid,
|
|
// additionally sets the gid to 0, and does not return an error.
|
|
func WithUserID(uid uint32) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
|
|
defer ensureAdditionalGids(s)
|
|
setProcess(s)
|
|
s.Process.User.AdditionalGids = nil
|
|
setUser := func(root string) error {
|
|
user, err := UserFromPath(root, func(u user.User) bool {
|
|
return u.Uid == int(uid)
|
|
})
|
|
if err != nil {
|
|
if os.IsNotExist(err) || err == ErrNoUsersFound {
|
|
s.Process.User.UID, s.Process.User.GID = uid, 0
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
|
|
return nil
|
|
}
|
|
if c.Snapshotter == "" && c.SnapshotKey == "" {
|
|
if !isRootfsAbs(s.Root.Path) {
|
|
return errors.New("rootfs absolute path is required")
|
|
}
|
|
return setUser(s.Root.Path)
|
|
}
|
|
if c.Snapshotter == "" {
|
|
return errors.New("no snapshotter set for container")
|
|
}
|
|
if c.SnapshotKey == "" {
|
|
return errors.New("rootfs snapshot not created for container")
|
|
}
|
|
snapshotter := client.SnapshotService(c.Snapshotter)
|
|
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mounts = tryReadonlyMounts(mounts)
|
|
return mount.WithTempMount(ctx, mounts, setUser)
|
|
}
|
|
}
|
|
|
|
// WithUsername sets the correct UID and GID for the container
|
|
// based on the image's /etc/passwd contents. If /etc/passwd
|
|
// does not exist, or the username is not found in /etc/passwd,
|
|
// it returns error. On Windows this sets the username as provided,
|
|
// the operating system will validate the user when going to run
|
|
// the container.
|
|
func WithUsername(username string) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
|
|
defer ensureAdditionalGids(s)
|
|
setProcess(s)
|
|
s.Process.User.AdditionalGids = nil
|
|
if s.Linux != nil {
|
|
setUser := func(root string) error {
|
|
user, err := UserFromPath(root, func(u user.User) bool {
|
|
return u.Name == username
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
|
|
return nil
|
|
}
|
|
if c.Snapshotter == "" && c.SnapshotKey == "" {
|
|
if !isRootfsAbs(s.Root.Path) {
|
|
return errors.New("rootfs absolute path is required")
|
|
}
|
|
return setUser(s.Root.Path)
|
|
}
|
|
if c.Snapshotter == "" {
|
|
return errors.New("no snapshotter set for container")
|
|
}
|
|
if c.SnapshotKey == "" {
|
|
return errors.New("rootfs snapshot not created for container")
|
|
}
|
|
snapshotter := client.SnapshotService(c.Snapshotter)
|
|
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mounts = tryReadonlyMounts(mounts)
|
|
return mount.WithTempMount(ctx, mounts, setUser)
|
|
} else if s.Windows != nil {
|
|
s.Process.User.Username = username
|
|
} else {
|
|
return errors.New("spec does not contain Linux or Windows section")
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithAdditionalGIDs sets the OCI spec's additionalGids array to any additional groups listed
|
|
// for a particular user in the /etc/group file of the image's root filesystem
|
|
// The passed in user can be either a uid or a username.
|
|
func WithAdditionalGIDs(userstr string) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
|
|
// For LCOW or on Darwin additional GID's not supported
|
|
if s.Windows != nil || runtime.GOOS == "darwin" {
|
|
return nil
|
|
}
|
|
setProcess(s)
|
|
s.Process.User.AdditionalGids = nil
|
|
setAdditionalGids := func(root string) error {
|
|
defer ensureAdditionalGids(s)
|
|
var username string
|
|
uid, err := strconv.Atoi(userstr)
|
|
if err == nil {
|
|
user, err := UserFromPath(root, func(u user.User) bool {
|
|
return u.Uid == uid
|
|
})
|
|
if err != nil {
|
|
if os.IsNotExist(err) || err == ErrNoUsersFound {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
username = user.Name
|
|
} else {
|
|
username = userstr
|
|
}
|
|
gids, err := getSupplementalGroupsFromPath(root, func(g user.Group) bool {
|
|
// we only want supplemental groups
|
|
if g.Name == username {
|
|
return false
|
|
}
|
|
for _, entry := range g.List {
|
|
if entry == username {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
})
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
s.Process.User.AdditionalGids = gids
|
|
return nil
|
|
}
|
|
if c.Snapshotter == "" && c.SnapshotKey == "" {
|
|
if !isRootfsAbs(s.Root.Path) {
|
|
return errors.New("rootfs absolute path is required")
|
|
}
|
|
return setAdditionalGids(s.Root.Path)
|
|
}
|
|
if c.Snapshotter == "" {
|
|
return errors.New("no snapshotter set for container")
|
|
}
|
|
if c.SnapshotKey == "" {
|
|
return errors.New("rootfs snapshot not created for container")
|
|
}
|
|
snapshotter := client.SnapshotService(c.Snapshotter)
|
|
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mounts = tryReadonlyMounts(mounts)
|
|
return mount.WithTempMount(ctx, mounts, setAdditionalGids)
|
|
}
|
|
}
|
|
|
|
// WithAppendAdditionalGroups append additional groups within the container.
|
|
// The passed in groups can be either a gid or a groupname.
|
|
func WithAppendAdditionalGroups(groups ...string) SpecOpts {
|
|
return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
|
|
// For LCOW or on Darwin additional GID's are not supported
|
|
if s.Windows != nil || runtime.GOOS == "darwin" {
|
|
return nil
|
|
}
|
|
setProcess(s)
|
|
setAdditionalGids := func(root string) error {
|
|
defer ensureAdditionalGids(s)
|
|
gpath, err := fs.RootPath(root, "/etc/group")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ugroups, err := user.ParseGroupFile(gpath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
groupMap := make(map[string]user.Group)
|
|
for _, group := range ugroups {
|
|
groupMap[group.Name] = group
|
|
}
|
|
var gids []uint32
|
|
for _, group := range groups {
|
|
gid, err := strconv.ParseUint(group, 10, 32)
|
|
if err == nil {
|
|
gids = append(gids, uint32(gid))
|
|
} else {
|
|
g, ok := groupMap[group]
|
|
if !ok {
|
|
return fmt.Errorf("unable to find group %s", group)
|
|
}
|
|
gids = append(gids, uint32(g.Gid))
|
|
}
|
|
}
|
|
s.Process.User.AdditionalGids = append(s.Process.User.AdditionalGids, gids...)
|
|
return nil
|
|
}
|
|
if c.Snapshotter == "" && c.SnapshotKey == "" {
|
|
if !filepath.IsAbs(s.Root.Path) {
|
|
return errors.New("rootfs absolute path is required")
|
|
}
|
|
return setAdditionalGids(s.Root.Path)
|
|
}
|
|
if c.Snapshotter == "" {
|
|
return errors.New("no snapshotter set for container")
|
|
}
|
|
if c.SnapshotKey == "" {
|
|
return errors.New("rootfs snapshot not created for container")
|
|
}
|
|
snapshotter := client.SnapshotService(c.Snapshotter)
|
|
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mounts = tryReadonlyMounts(mounts)
|
|
return mount.WithTempMount(ctx, mounts, setAdditionalGids)
|
|
}
|
|
}
|
|
|
|
// WithCapabilities sets Linux capabilities on the process
|
|
func WithCapabilities(caps []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCapabilities(s)
|
|
|
|
s.Process.Capabilities.Bounding = caps
|
|
s.Process.Capabilities.Effective = caps
|
|
s.Process.Capabilities.Permitted = caps
|
|
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func capsContain(caps []string, s string) bool {
|
|
for _, c := range caps {
|
|
if c == s {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func removeCap(caps *[]string, s string) {
|
|
var newcaps []string
|
|
for _, c := range *caps {
|
|
if c == s {
|
|
continue
|
|
}
|
|
newcaps = append(newcaps, c)
|
|
}
|
|
*caps = newcaps
|
|
}
|
|
|
|
// WithAddedCapabilities adds the provided capabilities
|
|
func WithAddedCapabilities(caps []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCapabilities(s)
|
|
for _, c := range caps {
|
|
for _, cl := range []*[]string{
|
|
&s.Process.Capabilities.Bounding,
|
|
&s.Process.Capabilities.Effective,
|
|
&s.Process.Capabilities.Permitted,
|
|
} {
|
|
if !capsContain(*cl, c) {
|
|
*cl = append(*cl, c)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithDroppedCapabilities removes the provided capabilities
|
|
func WithDroppedCapabilities(caps []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCapabilities(s)
|
|
for _, c := range caps {
|
|
for _, cl := range []*[]string{
|
|
&s.Process.Capabilities.Bounding,
|
|
&s.Process.Capabilities.Effective,
|
|
&s.Process.Capabilities.Permitted,
|
|
} {
|
|
removeCap(cl, c)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithAmbientCapabilities set the Linux ambient capabilities for the process
|
|
// Ambient capabilities should only be set for non-root users or the caller should
|
|
// understand how these capabilities are used and set
|
|
func WithAmbientCapabilities(caps []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCapabilities(s)
|
|
s.Process.Capabilities.Inheritable = caps
|
|
s.Process.Capabilities.Ambient = caps
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// ErrNoUsersFound can be returned from UserFromPath
|
|
var ErrNoUsersFound = errors.New("no users found")
|
|
|
|
// UserFromPath inspects the user object using /etc/passwd in the specified rootfs.
|
|
// filter can be nil.
|
|
func UserFromPath(root string, filter func(user.User) bool) (user.User, error) {
|
|
ppath, err := fs.RootPath(root, "/etc/passwd")
|
|
if err != nil {
|
|
return user.User{}, err
|
|
}
|
|
users, err := user.ParsePasswdFileFilter(ppath, filter)
|
|
if err != nil {
|
|
return user.User{}, err
|
|
}
|
|
if len(users) == 0 {
|
|
return user.User{}, ErrNoUsersFound
|
|
}
|
|
return users[0], nil
|
|
}
|
|
|
|
// ErrNoGroupsFound can be returned from GIDFromPath
|
|
var ErrNoGroupsFound = errors.New("no groups found")
|
|
|
|
// GIDFromPath inspects the GID using /etc/group in the specified rootfs.
|
|
// filter can be nil.
|
|
func GIDFromPath(root string, filter func(user.Group) bool) (gid uint32, err error) {
|
|
gpath, err := fs.RootPath(root, "/etc/group")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
groups, err := user.ParseGroupFileFilter(gpath, filter)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if len(groups) == 0 {
|
|
return 0, ErrNoGroupsFound
|
|
}
|
|
g := groups[0]
|
|
return uint32(g.Gid), nil
|
|
}
|
|
|
|
func getSupplementalGroupsFromPath(root string, filter func(user.Group) bool) ([]uint32, error) {
|
|
gpath, err := fs.RootPath(root, "/etc/group")
|
|
if err != nil {
|
|
return []uint32{}, err
|
|
}
|
|
groups, err := user.ParseGroupFileFilter(gpath, filter)
|
|
if err != nil {
|
|
return []uint32{}, err
|
|
}
|
|
if len(groups) == 0 {
|
|
// if there are no additional groups; just return an empty set
|
|
return []uint32{}, nil
|
|
}
|
|
addlGids := []uint32{}
|
|
for _, grp := range groups {
|
|
addlGids = append(addlGids, uint32(grp.Gid))
|
|
}
|
|
return addlGids, nil
|
|
}
|
|
|
|
func isRootfsAbs(root string) bool {
|
|
return filepath.IsAbs(root)
|
|
}
|
|
|
|
// WithMaskedPaths sets the masked paths option
|
|
func WithMaskedPaths(paths []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
s.Linux.MaskedPaths = paths
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithReadonlyPaths sets the read only paths option
|
|
func WithReadonlyPaths(paths []string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
s.Linux.ReadonlyPaths = paths
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWriteableSysfs makes any sysfs mounts writeable
|
|
func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
for _, m := range s.Mounts {
|
|
if m.Type == "sysfs" {
|
|
for i, o := range m.Options {
|
|
if o == "ro" {
|
|
m.Options[i] = "rw"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// WithWriteableCgroupfs makes any cgroup mounts writeable
|
|
func WithWriteableCgroupfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
for _, m := range s.Mounts {
|
|
if m.Type == "cgroup" {
|
|
for i, o := range m.Options {
|
|
if o == "ro" {
|
|
m.Options[i] = "rw"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// WithSelinuxLabel sets the process SELinux label
|
|
func WithSelinuxLabel(label string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.SelinuxLabel = label
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithApparmorProfile sets the Apparmor profile for the process
|
|
func WithApparmorProfile(profile string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setProcess(s)
|
|
s.Process.ApparmorProfile = profile
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithSeccompUnconfined clears the seccomp profile
|
|
func WithSeccompUnconfined(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
s.Linux.Seccomp = nil
|
|
return nil
|
|
}
|
|
|
|
// WithParentCgroupDevices uses the default cgroup setup to inherit the container's parent cgroup's
|
|
// allowed and denied devices
|
|
func WithParentCgroupDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
if s.Linux.Resources == nil {
|
|
s.Linux.Resources = &specs.LinuxResources{}
|
|
}
|
|
s.Linux.Resources.Devices = nil
|
|
return nil
|
|
}
|
|
|
|
// WithAllDevicesAllowed permits READ WRITE MKNOD on all devices nodes for the container
|
|
func WithAllDevicesAllowed(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
if s.Linux.Resources == nil {
|
|
s.Linux.Resources = &specs.LinuxResources{}
|
|
}
|
|
s.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{
|
|
{
|
|
Allow: true,
|
|
Access: rwm,
|
|
},
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// WithDefaultUnixDevices adds the default devices for unix such as /dev/null, /dev/random to
|
|
// the container's resource cgroup spec
|
|
func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
if s.Linux.Resources == nil {
|
|
s.Linux.Resources = &specs.LinuxResources{}
|
|
}
|
|
intptr := func(i int64) *int64 {
|
|
return &i
|
|
}
|
|
s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, []specs.LinuxDeviceCgroup{
|
|
{
|
|
// "/dev/null",
|
|
Type: "c",
|
|
Major: intptr(1),
|
|
Minor: intptr(3),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "/dev/random",
|
|
Type: "c",
|
|
Major: intptr(1),
|
|
Minor: intptr(8),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "/dev/full",
|
|
Type: "c",
|
|
Major: intptr(1),
|
|
Minor: intptr(7),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "/dev/tty",
|
|
Type: "c",
|
|
Major: intptr(5),
|
|
Minor: intptr(0),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "/dev/zero",
|
|
Type: "c",
|
|
Major: intptr(1),
|
|
Minor: intptr(5),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "/dev/urandom",
|
|
Type: "c",
|
|
Major: intptr(1),
|
|
Minor: intptr(9),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "/dev/console",
|
|
Type: "c",
|
|
Major: intptr(5),
|
|
Minor: intptr(1),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
// /dev/pts/ - pts namespaces are "coming soon"
|
|
{
|
|
Type: "c",
|
|
Major: intptr(136),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
{
|
|
// "dev/ptmx"
|
|
Type: "c",
|
|
Major: intptr(5),
|
|
Minor: intptr(2),
|
|
Access: rwm,
|
|
Allow: true,
|
|
},
|
|
}...)
|
|
return nil
|
|
}
|
|
|
|
// WithPrivileged sets up options for a privileged container
|
|
var WithPrivileged = Compose(
|
|
WithAllCurrentCapabilities,
|
|
WithMaskedPaths(nil),
|
|
WithReadonlyPaths(nil),
|
|
WithWriteableSysfs,
|
|
WithWriteableCgroupfs,
|
|
WithSelinuxLabel(""),
|
|
WithApparmorProfile(""),
|
|
WithSeccompUnconfined,
|
|
)
|
|
|
|
// WithWindowsHyperV sets the Windows.HyperV section for HyperV isolation of containers.
|
|
func WithWindowsHyperV(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if s.Windows == nil {
|
|
s.Windows = &specs.Windows{}
|
|
}
|
|
if s.Windows.HyperV == nil {
|
|
s.Windows.HyperV = &specs.WindowsHyperV{}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// WithMemoryLimit sets the `Linux.LinuxResources.Memory.Limit` section to the
|
|
// `limit` specified if the `Linux` section is not `nil`. Additionally sets the
|
|
// `Windows.WindowsResources.Memory.Limit` section if the `Windows` section is
|
|
// not `nil`.
|
|
func WithMemoryLimit(limit uint64) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if s.Linux != nil {
|
|
if s.Linux.Resources == nil {
|
|
s.Linux.Resources = &specs.LinuxResources{}
|
|
}
|
|
if s.Linux.Resources.Memory == nil {
|
|
s.Linux.Resources.Memory = &specs.LinuxMemory{}
|
|
}
|
|
l := int64(limit)
|
|
s.Linux.Resources.Memory.Limit = &l
|
|
}
|
|
if s.Windows != nil {
|
|
if s.Windows.Resources == nil {
|
|
s.Windows.Resources = &specs.WindowsResources{}
|
|
}
|
|
if s.Windows.Resources.Memory == nil {
|
|
s.Windows.Resources.Memory = &specs.WindowsMemoryResources{}
|
|
}
|
|
s.Windows.Resources.Memory.Limit = &limit
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithAnnotations appends or replaces the annotations on the spec with the
|
|
// provided annotations
|
|
func WithAnnotations(annotations map[string]string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if s.Annotations == nil {
|
|
s.Annotations = make(map[string]string)
|
|
}
|
|
for k, v := range annotations {
|
|
s.Annotations[k] = v
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithLinuxDevices adds the provided linux devices to the spec
|
|
func WithLinuxDevices(devices []specs.LinuxDevice) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
s.Linux.Devices = append(s.Linux.Devices, devices...)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func WithLinuxDeviceFollowSymlinks(path, permissions string) SpecOpts {
|
|
return withLinuxDevice(path, permissions, true)
|
|
}
|
|
|
|
// WithLinuxDevice adds the device specified by path to the spec
|
|
func WithLinuxDevice(path, permissions string) SpecOpts {
|
|
return withLinuxDevice(path, permissions, false)
|
|
}
|
|
|
|
func withLinuxDevice(path, permissions string, followSymlinks bool) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setLinux(s)
|
|
setResources(s)
|
|
|
|
if followSymlinks {
|
|
resolvedPath, err := filepath.EvalSymlinks(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
path = resolvedPath
|
|
}
|
|
|
|
dev, err := DeviceFromPath(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s.Linux.Devices = append(s.Linux.Devices, *dev)
|
|
|
|
s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, specs.LinuxDeviceCgroup{
|
|
Type: dev.Type,
|
|
Allow: true,
|
|
Major: &dev.Major,
|
|
Minor: &dev.Minor,
|
|
Access: permissions,
|
|
})
|
|
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithEnvFile adds environment variables from a file to the container's spec
|
|
func WithEnvFile(path string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
var vars []string
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
sc := bufio.NewScanner(f)
|
|
for sc.Scan() {
|
|
vars = append(vars, sc.Text())
|
|
}
|
|
if err = sc.Err(); err != nil {
|
|
return err
|
|
}
|
|
return WithEnv(vars)(nil, nil, nil, s)
|
|
}
|
|
}
|
|
|
|
// ErrNoShmMount is returned when there is no /dev/shm mount specified in the config
|
|
// and an Opts was trying to set a configuration value on the mount.
|
|
var ErrNoShmMount = errors.New("no /dev/shm mount specified")
|
|
|
|
// WithDevShmSize sets the size of the /dev/shm mount for the container.
|
|
//
|
|
// The size value is specified in kb, kilobytes.
|
|
func WithDevShmSize(kb int64) SpecOpts {
|
|
return func(ctx context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
for i, m := range s.Mounts {
|
|
if filepath.Clean(m.Destination) == "/dev/shm" && m.Source == "shm" && m.Type == "tmpfs" {
|
|
for i := 0; i < len(m.Options); i++ {
|
|
if strings.HasPrefix(m.Options[i], "size=") {
|
|
m.Options = append(m.Options[:i], m.Options[i+1:]...)
|
|
i--
|
|
}
|
|
}
|
|
s.Mounts[i].Options = append(m.Options, fmt.Sprintf("size=%dk", kb))
|
|
return nil
|
|
}
|
|
}
|
|
return ErrNoShmMount
|
|
}
|
|
}
|
|
|
|
// tryReadonlyMounts is used by the options which are trying to get user/group
|
|
// information from container's rootfs. Since the option does read operation
|
|
// only, this helper will append ReadOnly mount option to prevent linux kernel
|
|
// from syncing whole filesystem in umount syscall.
|
|
//
|
|
// TODO(fuweid):
|
|
//
|
|
// Currently, it only works for overlayfs. I think we can apply it to other
|
|
// kinds of filesystem. Maybe we can return `ro` option by `snapshotter.Mount`
|
|
// API, when the caller passes that experimental annotation
|
|
// `containerd.io/snapshot/readonly.mount` something like that.
|
|
func tryReadonlyMounts(mounts []mount.Mount) []mount.Mount {
|
|
if len(mounts) == 1 && mounts[0].Type == "overlay" {
|
|
mounts[0].Options = append(mounts[0].Options, "ro")
|
|
}
|
|
return mounts
|
|
}
|
|
|
|
// WithWindowsDevice adds a device exposed to a Windows (WCOW or LCOW) Container
|
|
func WithWindowsDevice(idType, id string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if idType == "" {
|
|
return errors.New("missing idType")
|
|
}
|
|
if s.Windows == nil {
|
|
s.Windows = &specs.Windows{}
|
|
}
|
|
s.Windows.Devices = append(s.Windows.Devices, specs.WindowsDevice{IDType: idType, ID: id})
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithMemorySwap sets the container's swap in bytes
|
|
func WithMemorySwap(swap int64) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setResources(s)
|
|
if s.Linux.Resources.Memory == nil {
|
|
s.Linux.Resources.Memory = &specs.LinuxMemory{}
|
|
}
|
|
s.Linux.Resources.Memory.Swap = &swap
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithPidsLimit sets the container's pid limit or maximum
|
|
func WithPidsLimit(limit int64) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setResources(s)
|
|
if s.Linux.Resources.Pids == nil {
|
|
s.Linux.Resources.Pids = &specs.LinuxPids{}
|
|
}
|
|
s.Linux.Resources.Pids.Limit = limit
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithBlockIO sets the container's blkio parameters
|
|
func WithBlockIO(blockio *specs.LinuxBlockIO) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setResources(s)
|
|
s.Linux.Resources.BlockIO = blockio
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCPUShares sets the container's cpu shares
|
|
func WithCPUShares(shares uint64) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setCPU(s)
|
|
s.Linux.Resources.CPU.Shares = &shares
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCPUs sets the container's cpus/cores for use by the container
|
|
func WithCPUs(cpus string) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setCPU(s)
|
|
s.Linux.Resources.CPU.Cpus = cpus
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCPUsMems sets the container's cpu mems for use by the container
|
|
func WithCPUsMems(mems string) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setCPU(s)
|
|
s.Linux.Resources.CPU.Mems = mems
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCPUCFS sets the container's Completely fair scheduling (CFS) quota and period
|
|
func WithCPUCFS(quota int64, period uint64) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setCPU(s)
|
|
s.Linux.Resources.CPU.Quota = "a
|
|
s.Linux.Resources.CPU.Period = &period
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCPURT sets the container's realtime scheduling (RT) runtime and period.
|
|
func WithCPURT(runtime int64, period uint64) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
setCPU(s)
|
|
s.Linux.Resources.CPU.RealtimeRuntime = &runtime
|
|
s.Linux.Resources.CPU.RealtimePeriod = &period
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithoutRunMount removes the `/run` inside the spec
|
|
func WithoutRunMount(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
|
|
return WithoutMounts("/run")(ctx, client, c, s)
|
|
}
|
|
|
|
// WithRdt sets the container's RDT parameters
|
|
func WithRdt(closID, l3CacheSchema, memBwSchema string) SpecOpts {
|
|
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
|
|
s.Linux.IntelRdt = &specs.LinuxIntelRdt{
|
|
ClosID: closID,
|
|
L3CacheSchema: l3CacheSchema,
|
|
MemBwSchema: memBwSchema,
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWindowsCPUCount sets the `Windows.Resources.CPU.Count` section to the
|
|
// `count` specified.
|
|
func WithWindowsCPUCount(count uint64) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCPUWindows(s)
|
|
s.Windows.Resources.CPU.Count = &count
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWindowsCPUShares sets the `Windows.Resources.CPU.Shares` section to the
|
|
// `shares` specified.
|
|
func WithWindowsCPUShares(shares uint16) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCPUWindows(s)
|
|
s.Windows.Resources.CPU.Shares = &shares
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWindowsCPUMaximum sets the `Windows.Resources.CPU.Maximum` section to the
|
|
// `max` specified.
|
|
func WithWindowsCPUMaximum(max uint16) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
setCPUWindows(s)
|
|
s.Windows.Resources.CPU.Maximum = &max
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWindowsIgnoreFlushesDuringBoot sets `Windows.IgnoreFlushesDuringBoot`.
|
|
func WithWindowsIgnoreFlushesDuringBoot() SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if s.Windows == nil {
|
|
s.Windows = &specs.Windows{}
|
|
}
|
|
s.Windows.IgnoreFlushesDuringBoot = true
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWindowNetworksAllowUnqualifiedDNSQuery sets `Windows.Network.AllowUnqualifiedDNSQuery`.
|
|
func WithWindowNetworksAllowUnqualifiedDNSQuery() SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if s.Windows == nil {
|
|
s.Windows = &specs.Windows{}
|
|
}
|
|
if s.Windows.Network == nil {
|
|
s.Windows.Network = &specs.WindowsNetwork{}
|
|
}
|
|
|
|
s.Windows.Network.AllowUnqualifiedDNSQuery = true
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithWindowsNetworkNamespace sets the network namespace for a Windows container.
|
|
func WithWindowsNetworkNamespace(ns string) SpecOpts {
|
|
return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
|
|
if s.Windows == nil {
|
|
s.Windows = &specs.Windows{}
|
|
}
|
|
if s.Windows.Network == nil {
|
|
s.Windows.Network = &specs.WindowsNetwork{}
|
|
}
|
|
s.Windows.Network.NetworkNamespace = ns
|
|
return nil
|
|
}
|
|
}
|