Move CRI from pkg/ to internal/
Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
This commit is contained in:
155
internal/cri/opts/container.go
Normal file
155
internal/cri/opts/container.go
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/continuity/fs"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
|
||||
containerd "github.com/containerd/containerd/v2/client"
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/core/mount"
|
||||
"github.com/containerd/containerd/v2/core/snapshots"
|
||||
"github.com/containerd/errdefs"
|
||||
"github.com/containerd/log"
|
||||
)
|
||||
|
||||
// WithNewSnapshot wraps `containerd.WithNewSnapshot` so that if creating the
|
||||
// snapshot fails we make sure the image is actually unpacked and retry.
|
||||
func WithNewSnapshot(id string, i containerd.Image, opts ...snapshots.Opt) containerd.NewContainerOpts {
|
||||
f := containerd.WithNewSnapshot(id, i, opts...)
|
||||
return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
|
||||
if err := f(ctx, client, c); err != nil {
|
||||
if !errdefs.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.Unpack(ctx, c.Snapshotter); err != nil {
|
||||
return fmt.Errorf("error unpacking image: %w", err)
|
||||
}
|
||||
return f(ctx, client, c)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
|
||||
// It doesn't update runtime spec.
|
||||
// The passed in map is a host path to container path map for all volumes.
|
||||
func WithVolumes(volumeMounts map[string]string, platform imagespec.Platform) containerd.NewContainerOpts {
|
||||
return func(ctx context.Context, client *containerd.Client, c *containers.Container) (err error) {
|
||||
if c.Snapshotter == "" {
|
||||
return errors.New("no snapshotter set for container")
|
||||
}
|
||||
if c.SnapshotKey == "" {
|
||||
return errors.New("rootfs not created for container")
|
||||
}
|
||||
snapshotter := client.SnapshotService(c.Snapshotter)
|
||||
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Since only read is needed, append ReadOnly mount option to prevent linux kernel
|
||||
// from syncing whole filesystem in umount syscall.
|
||||
if len(mounts) == 1 && mounts[0].Type == "overlay" {
|
||||
mounts[0].Options = append(mounts[0].Options, "ro")
|
||||
}
|
||||
|
||||
root, err := os.MkdirTemp("", "ctd-volume")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// We change RemoveAll to Remove so that we either leak a temp dir
|
||||
// if it fails but not RM snapshot data.
|
||||
// refer to https://github.com/containerd/containerd/pull/1868
|
||||
// https://github.com/containerd/containerd/pull/1785
|
||||
defer os.Remove(root)
|
||||
|
||||
if err := mount.All(mounts, root); err != nil {
|
||||
return fmt.Errorf("failed to mount: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if uerr := mount.Unmount(root, 0); uerr != nil {
|
||||
log.G(ctx).WithError(uerr).Errorf("Failed to unmount snapshot %q", root)
|
||||
if err == nil {
|
||||
err = uerr
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
for host, volume := range volumeMounts {
|
||||
if platform.OS == "windows" {
|
||||
// Windows allows volume mounts in subfolders under C: and as any other drive letter like D:, E:, etc.
|
||||
// An image may contain files inside a folder defined as a VOLUME in a Dockerfile. On Windows, images
|
||||
// can only contain pre-existing files for volumes situated on the root filesystem, which is C:.
|
||||
// For any other volumes, we need to skip attempting to copy existing contents.
|
||||
//
|
||||
// C:\some\volume --> \some\volume
|
||||
// D:\some\volume --> skip
|
||||
if len(volume) >= 2 && string(volume[1]) == ":" {
|
||||
// Perform a case insensitive comparison to "C", and skip non-C mounted volumes.
|
||||
if !strings.EqualFold(string(volume[0]), "c") {
|
||||
continue
|
||||
}
|
||||
// This is a volume mounted somewhere under C:\. We strip the drive letter and allow fs.RootPath()
|
||||
// to append the remaining path to the rootfs path as seen by the host OS.
|
||||
volume = volume[2:]
|
||||
}
|
||||
}
|
||||
src, err := fs.RootPath(root, volume)
|
||||
if err != nil {
|
||||
return fmt.Errorf("rootpath on mountPath %s, volume %s: %w", root, volume, err)
|
||||
}
|
||||
if _, err := os.Stat(src); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// Skip copying directory if it does not exist.
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("stat volume in rootfs: %w", err)
|
||||
}
|
||||
if err := copyExistingContents(src, host); err != nil {
|
||||
return fmt.Errorf("taking runtime copy of volume: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// copyExistingContents copies from the source to the destination and
|
||||
// ensures the ownership is appropriately set.
|
||||
func copyExistingContents(source, destination string) error {
|
||||
f, err := os.Open(destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
dstList, err := f.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(dstList) != 0 {
|
||||
return fmt.Errorf("volume at %q is not initially empty", destination)
|
||||
}
|
||||
return fs.CopyDir(destination, source, fs.WithXAttrExclude("security.selinux"))
|
||||
}
|
||||
119
internal/cri/opts/spec_darwin_opts.go
Normal file
119
internal/cri/opts/spec_darwin_opts.go
Normal file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
osinterface "github.com/containerd/containerd/v2/pkg/os"
|
||||
)
|
||||
|
||||
// WithDarwinMounts adds mounts from CRI's container config + extra mounts.
|
||||
func WithDarwinMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, container *containers.Container, s *oci.Spec) error {
|
||||
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
|
||||
// is mounted by both a CRI mount and an extra mount, the CRI mount will
|
||||
// be kept.
|
||||
var (
|
||||
criMounts = config.GetMounts()
|
||||
mounts = append([]*runtime.Mount{}, criMounts...)
|
||||
)
|
||||
|
||||
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
|
||||
for _, e := range extra {
|
||||
found := false
|
||||
for _, c := range criMounts {
|
||||
if cleanMount(e.ContainerPath) == cleanMount(c.ContainerPath) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
mounts = append(mounts, e)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort mounts in number of parts. This ensures that high level mounts don't
|
||||
// shadow other mounts.
|
||||
sort.Sort(orderedMounts(mounts))
|
||||
|
||||
// Copy all mounts from default mounts, except for
|
||||
// - mounts overridden by supplied mount;
|
||||
mountSet := make(map[string]struct{})
|
||||
for _, m := range mounts {
|
||||
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
|
||||
}
|
||||
|
||||
defaultMounts := s.Mounts
|
||||
s.Mounts = nil
|
||||
|
||||
for _, m := range defaultMounts {
|
||||
dst := cleanMount(m.Destination)
|
||||
if _, ok := mountSet[dst]; ok {
|
||||
// filter out mount overridden by a supplied mount
|
||||
continue
|
||||
}
|
||||
s.Mounts = append(s.Mounts, m)
|
||||
}
|
||||
|
||||
for _, mount := range mounts {
|
||||
var (
|
||||
dst = mount.GetContainerPath()
|
||||
src = mount.GetHostPath()
|
||||
)
|
||||
|
||||
// Create the host path if it doesn't exist.
|
||||
if _, err := osi.Stat(src); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to stat %q: %w", src, err)
|
||||
}
|
||||
if err := osi.MkdirAll(src, 0755); err != nil {
|
||||
return fmt.Errorf("failed to mkdir %q: %w", src, err)
|
||||
}
|
||||
}
|
||||
|
||||
src, err := osi.ResolveSymbolicLink(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to resolve symlink %q: %w", src, err)
|
||||
}
|
||||
|
||||
var options []string
|
||||
if mount.GetReadonly() {
|
||||
options = append(options, "ro")
|
||||
} else {
|
||||
options = append(options, "rw")
|
||||
}
|
||||
|
||||
s.Mounts = append(s.Mounts, runtimespec.Mount{
|
||||
Source: src,
|
||||
Destination: dst,
|
||||
Type: "bind",
|
||||
Options: options,
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
177
internal/cri/opts/spec_linux.go
Normal file
177
internal/cri/opts/spec_linux.go
Normal file
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/containerd/cgroups/v3"
|
||||
"golang.org/x/sys/unix"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
"github.com/containerd/log"
|
||||
)
|
||||
|
||||
// Linux dependent OCI spec opts.
|
||||
|
||||
var (
|
||||
swapControllerAvailability bool
|
||||
swapControllerAvailabilityOnce sync.Once
|
||||
)
|
||||
|
||||
// SwapControllerAvailable returns true if the swap controller is available
|
||||
func SwapControllerAvailable() bool {
|
||||
swapControllerAvailabilityOnce.Do(func() {
|
||||
const warn = "Failed to detect the availability of the swap controller, assuming not available"
|
||||
p := "/sys/fs/cgroup/memory/memory.memsw.limit_in_bytes"
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
// memory.swap.max does not exist in the cgroup root, so we check /sys/fs/cgroup/<SELF>/memory.swap.max
|
||||
_, unified, err := cgroups.ParseCgroupFileUnified("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse /proc/self/cgroup: %w", err)
|
||||
log.L.WithError(err).Warn(warn)
|
||||
return
|
||||
}
|
||||
p = filepath.Join("/sys/fs/cgroup", unified, "memory.swap.max")
|
||||
}
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
log.L.WithError(err).Warn(warn)
|
||||
}
|
||||
return
|
||||
}
|
||||
swapControllerAvailability = true
|
||||
})
|
||||
return swapControllerAvailability
|
||||
}
|
||||
|
||||
var (
|
||||
supportsHugetlbOnce sync.Once
|
||||
supportsHugetlb bool
|
||||
)
|
||||
|
||||
func isHugetlbControllerPresent() bool {
|
||||
supportsHugetlbOnce.Do(func() {
|
||||
supportsHugetlb = false
|
||||
if IsCgroup2UnifiedMode() {
|
||||
supportsHugetlb = cgroupv2HasHugetlb()
|
||||
} else {
|
||||
supportsHugetlb = cgroupv1HasHugetlb()
|
||||
}
|
||||
})
|
||||
return supportsHugetlb
|
||||
}
|
||||
|
||||
var (
|
||||
_cgroupv1HasHugetlbOnce sync.Once
|
||||
_cgroupv1HasHugetlb bool
|
||||
_cgroupv2HasHugetlbOnce sync.Once
|
||||
_cgroupv2HasHugetlb bool
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
)
|
||||
|
||||
// cgroupv1HasHugetlb returns whether the hugetlb controller is present on
|
||||
// cgroup v1.
|
||||
func cgroupv1HasHugetlb() bool {
|
||||
_cgroupv1HasHugetlbOnce.Do(func() {
|
||||
if _, err := os.ReadDir("/sys/fs/cgroup/hugetlb"); err != nil {
|
||||
_cgroupv1HasHugetlb = false
|
||||
} else {
|
||||
_cgroupv1HasHugetlb = true
|
||||
}
|
||||
})
|
||||
return _cgroupv1HasHugetlb
|
||||
}
|
||||
|
||||
// cgroupv2HasHugetlb returns whether the hugetlb controller is present on
|
||||
// cgroup v2.
|
||||
func cgroupv2HasHugetlb() bool {
|
||||
_cgroupv2HasHugetlbOnce.Do(func() {
|
||||
controllers, err := os.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_cgroupv2HasHugetlb = strings.Contains(string(controllers), "hugetlb")
|
||||
})
|
||||
return _cgroupv2HasHugetlb
|
||||
}
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st syscall.Statfs_t
|
||||
if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
|
||||
panic("cannot statfs cgroup root")
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// WithCDI updates OCI spec with CDI content
|
||||
func WithCDI(annotations map[string]string, CDIDevices []*runtime.CDIDevice) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *oci.Spec) error {
|
||||
seen := make(map[string]bool)
|
||||
// Add devices from CDIDevices CRI field
|
||||
var devices []string
|
||||
var err error
|
||||
for _, device := range CDIDevices {
|
||||
deviceName := device.Name
|
||||
if seen[deviceName] {
|
||||
log.G(ctx).Debugf("Skipping duplicated CDI device %s", deviceName)
|
||||
continue
|
||||
}
|
||||
devices = append(devices, deviceName)
|
||||
seen[deviceName] = true
|
||||
}
|
||||
log.G(ctx).Infof("Container %v: CDI devices from CRI Config.CDIDevices: %v", c.ID, devices)
|
||||
|
||||
// Add devices from CDI annotations
|
||||
_, devsFromAnnotations, err := cdi.ParseAnnotations(annotations)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse CDI device annotations: %w", err)
|
||||
}
|
||||
|
||||
if devsFromAnnotations != nil {
|
||||
log.G(ctx).Infof("Container %v: CDI devices from annotations: %v", c.ID, devsFromAnnotations)
|
||||
for _, deviceName := range devsFromAnnotations {
|
||||
if seen[deviceName] {
|
||||
// TODO: change to Warning when passing CDI devices as annotations is deprecated
|
||||
log.G(ctx).Debugf("Skipping duplicated CDI device %s", deviceName)
|
||||
continue
|
||||
}
|
||||
devices = append(devices, deviceName)
|
||||
seen[deviceName] = true
|
||||
}
|
||||
// TODO: change to Warning when passing CDI devices as annotations is deprecated
|
||||
log.G(ctx).Debug("Passing CDI devices as annotations will be deprecated soon, please use CRI CDIDevices instead")
|
||||
}
|
||||
|
||||
return oci.WithCDIDevices(devices...)(ctx, client, c, s)
|
||||
}
|
||||
}
|
||||
471
internal/cri/opts/spec_linux_opts.go
Normal file
471
internal/cri/opts/spec_linux_opts.go
Normal file
@@ -0,0 +1,471 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/core/mount"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
osinterface "github.com/containerd/containerd/v2/pkg/os"
|
||||
"github.com/containerd/log"
|
||||
)
|
||||
|
||||
// RuntimeConfig is a subset of [github.com/containerd/containerd/v2/internal/cri/config].
|
||||
// Needed for avoiding circular imports.
|
||||
type RuntimeConfig struct {
|
||||
TreatRoMountsAsRro bool // only applies to volumes
|
||||
}
|
||||
|
||||
// WithMounts sorts and adds runtime and CRI mounts to the spec
|
||||
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string, rtConfig *RuntimeConfig) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
|
||||
// is mounted by both a CRI mount and an extra mount, the CRI mount will
|
||||
// be kept.
|
||||
var (
|
||||
criMounts = config.GetMounts()
|
||||
mounts = append([]*runtime.Mount{}, criMounts...)
|
||||
)
|
||||
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
|
||||
for _, e := range extra {
|
||||
found := false
|
||||
for _, c := range criMounts {
|
||||
if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
mounts = append(mounts, e)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort mounts in number of parts. This ensures that high level mounts don't
|
||||
// shadow other mounts.
|
||||
sort.Sort(orderedMounts(mounts))
|
||||
|
||||
// Mount cgroup into the container as readonly, which inherits docker's behavior.
|
||||
// TreatRoMountsAsRro does not apply here, as /sys/fs/cgroup is not a volume.
|
||||
s.Mounts = append(s.Mounts, runtimespec.Mount{
|
||||
Source: "cgroup",
|
||||
Destination: "/sys/fs/cgroup",
|
||||
Type: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
||||
})
|
||||
|
||||
// Copy all mounts from default mounts, except for
|
||||
// - mounts overridden by supplied mount;
|
||||
// - all mounts under /dev if a supplied /dev is present.
|
||||
mountSet := make(map[string]struct{})
|
||||
for _, m := range mounts {
|
||||
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
|
||||
}
|
||||
|
||||
defaultMounts := s.Mounts
|
||||
s.Mounts = nil
|
||||
|
||||
for _, m := range defaultMounts {
|
||||
dst := filepath.Clean(m.Destination)
|
||||
if _, ok := mountSet[dst]; ok {
|
||||
// filter out mount overridden by a supplied mount
|
||||
continue
|
||||
}
|
||||
if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") {
|
||||
// filter out everything under /dev if /dev is a supplied mount
|
||||
continue
|
||||
}
|
||||
s.Mounts = append(s.Mounts, m)
|
||||
}
|
||||
|
||||
for _, mount := range mounts {
|
||||
var (
|
||||
dst = mount.GetContainerPath()
|
||||
src = mount.GetHostPath()
|
||||
)
|
||||
// Create the host path if it doesn't exist.
|
||||
// TODO(random-liu): Add CRI validation test for this case.
|
||||
if _, err := osi.Stat(src); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to stat %q: %w", src, err)
|
||||
}
|
||||
if err := osi.MkdirAll(src, 0755); err != nil {
|
||||
return fmt.Errorf("failed to mkdir %q: %w", src, err)
|
||||
}
|
||||
}
|
||||
// TODO(random-liu): Add cri-containerd integration test or cri validation test
|
||||
// for this.
|
||||
src, err := osi.ResolveSymbolicLink(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to resolve symlink %q: %w", src, err)
|
||||
}
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
options := []string{"rbind"}
|
||||
switch mount.GetPropagation() {
|
||||
case runtime.MountPropagation_PROPAGATION_PRIVATE:
|
||||
options = append(options, "rprivate")
|
||||
// Since default root propagation in runc is rprivate ignore
|
||||
// setting the root propagation
|
||||
case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL:
|
||||
if err := ensureShared(src, osi.LookupMount); err != nil {
|
||||
return err
|
||||
}
|
||||
options = append(options, "rshared")
|
||||
s.Linux.RootfsPropagation = "rshared"
|
||||
case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER:
|
||||
if err := ensureSharedOrSlave(src, osi.LookupMount); err != nil {
|
||||
return err
|
||||
}
|
||||
options = append(options, "rslave")
|
||||
if s.Linux.RootfsPropagation != "rshared" &&
|
||||
s.Linux.RootfsPropagation != "rslave" {
|
||||
s.Linux.RootfsPropagation = "rslave"
|
||||
}
|
||||
default:
|
||||
log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath)
|
||||
options = append(options, "rprivate")
|
||||
}
|
||||
|
||||
var srcIsDir bool
|
||||
if srcSt, err := osi.Stat(src); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) { // happens when osi is FakeOS
|
||||
srcIsDir = true // assume src to be dir
|
||||
} else {
|
||||
return fmt.Errorf("failed to stat mount source %q: %w", src, err)
|
||||
}
|
||||
} else if srcSt != nil { // srcSt can be nil when osi is FakeOS
|
||||
srcIsDir = srcSt.IsDir()
|
||||
}
|
||||
|
||||
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
|
||||
// is readonly. This is different from docker's behavior, but make more sense.
|
||||
if mount.GetReadonly() {
|
||||
if rtConfig != nil && rtConfig.TreatRoMountsAsRro && srcIsDir {
|
||||
options = append(options, "rro")
|
||||
} else {
|
||||
options = append(options, "ro")
|
||||
}
|
||||
} else {
|
||||
options = append(options, "rw")
|
||||
}
|
||||
|
||||
if mount.GetSelinuxRelabel() {
|
||||
ENOTSUP := syscall.Errno(0x5f) // Linux specific error code, this branch will not execute on non Linux platforms.
|
||||
if err := label.Relabel(src, mountLabel, false); err != nil && err != ENOTSUP {
|
||||
return fmt.Errorf("relabel %q with %q failed: %w", src, mountLabel, err)
|
||||
}
|
||||
}
|
||||
|
||||
var uidMapping []runtimespec.LinuxIDMapping
|
||||
if mount.UidMappings != nil {
|
||||
for _, mapping := range mount.UidMappings {
|
||||
uidMapping = append(uidMapping, runtimespec.LinuxIDMapping{
|
||||
HostID: mapping.HostId,
|
||||
ContainerID: mapping.ContainerId,
|
||||
Size: mapping.Length,
|
||||
})
|
||||
}
|
||||
}
|
||||
var gidMapping []runtimespec.LinuxIDMapping
|
||||
if mount.GidMappings != nil {
|
||||
for _, mapping := range mount.GidMappings {
|
||||
gidMapping = append(gidMapping, runtimespec.LinuxIDMapping{
|
||||
HostID: mapping.HostId,
|
||||
ContainerID: mapping.ContainerId,
|
||||
Size: mapping.Length,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
s.Mounts = append(s.Mounts, runtimespec.Mount{
|
||||
Source: src,
|
||||
Destination: dst,
|
||||
Type: "bind",
|
||||
Options: options,
|
||||
UIDMappings: uidMapping,
|
||||
GIDMappings: gidMapping,
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure mount point on which path is mounted, is shared.
|
||||
func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error {
|
||||
mountInfo, err := lookupMount(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make sure source mount point is shared.
|
||||
optsSplit := strings.Split(mountInfo.Optional, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint)
|
||||
}
|
||||
|
||||
// ensure mount point on which path is mounted, is either shared or slave.
|
||||
func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error {
|
||||
mountInfo, err := lookupMount(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Make sure source mount point is shared.
|
||||
optsSplit := strings.Split(mountInfo.Optional, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
return nil
|
||||
} else if strings.HasPrefix(opt, "master:") {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
|
||||
}
|
||||
|
||||
// getDeviceUserGroupID() is used to find the right uid/gid
|
||||
// value for the device node created in the container namespace.
|
||||
// The runtime executes mknod() and chmod()s the created
|
||||
// device with the values returned here.
|
||||
//
|
||||
// On Linux, uid and gid are sufficient and the user/groupname do not
|
||||
// need to be resolved.
|
||||
//
|
||||
// TODO(mythi): In case of user namespaces, the runtime simply bind
|
||||
// mounts the devices from the host. Additional logic is needed
|
||||
// to check that the runtimes effective UID/GID on the host has the
|
||||
// permissions to access the device node and/or the right user namespace
|
||||
// mappings are created.
|
||||
//
|
||||
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
|
||||
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
|
||||
if runAsVal != nil {
|
||||
return uint32(runAsVal.GetValue())
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// WithDevices sets the provided devices onto the container spec
|
||||
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
if s.Linux.Resources == nil {
|
||||
s.Linux.Resources = &runtimespec.LinuxResources{}
|
||||
}
|
||||
|
||||
oldDevices := len(s.Linux.Devices)
|
||||
|
||||
for _, device := range config.GetDevices() {
|
||||
path, err := osi.ResolveSymbolicLink(device.HostPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
o := oci.WithDevices(path, device.ContainerPath, device.Permissions)
|
||||
if err := o(ctx, client, c, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if enableDeviceOwnershipFromSecurityContext {
|
||||
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
|
||||
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
|
||||
// Loop all new devices added by oci.WithDevices() to update their
|
||||
// dev.UID/dev.GID.
|
||||
//
|
||||
// non-zero UID/GID from SecurityContext is used to override host's
|
||||
// device UID/GID for the container.
|
||||
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
|
||||
if UID != 0 {
|
||||
*s.Linux.Devices[idx].UID = UID
|
||||
}
|
||||
if GID != 0 {
|
||||
*s.Linux.Devices[idx].GID = GID
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithResources sets the provided resource restrictions
|
||||
func WithResources(resources *runtime.LinuxContainerResources, tolerateMissingHugetlbController, disableHugetlbController bool) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if resources == nil {
|
||||
return nil
|
||||
}
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
if s.Linux.Resources == nil {
|
||||
s.Linux.Resources = &runtimespec.LinuxResources{}
|
||||
}
|
||||
if s.Linux.Resources.CPU == nil {
|
||||
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
|
||||
}
|
||||
if s.Linux.Resources.Memory == nil {
|
||||
s.Linux.Resources.Memory = &runtimespec.LinuxMemory{}
|
||||
}
|
||||
var (
|
||||
p = uint64(resources.GetCpuPeriod())
|
||||
q = resources.GetCpuQuota()
|
||||
shares = uint64(resources.GetCpuShares())
|
||||
limit = resources.GetMemoryLimitInBytes()
|
||||
swapLimit = resources.GetMemorySwapLimitInBytes()
|
||||
hugepages = resources.GetHugepageLimits()
|
||||
)
|
||||
|
||||
if p != 0 {
|
||||
s.Linux.Resources.CPU.Period = &p
|
||||
}
|
||||
if q != 0 {
|
||||
s.Linux.Resources.CPU.Quota = &q
|
||||
}
|
||||
if shares != 0 {
|
||||
s.Linux.Resources.CPU.Shares = &shares
|
||||
}
|
||||
if cpus := resources.GetCpusetCpus(); cpus != "" {
|
||||
s.Linux.Resources.CPU.Cpus = cpus
|
||||
}
|
||||
if mems := resources.GetCpusetMems(); mems != "" {
|
||||
s.Linux.Resources.CPU.Mems = resources.GetCpusetMems()
|
||||
}
|
||||
if limit != 0 {
|
||||
s.Linux.Resources.Memory.Limit = &limit
|
||||
// swap/memory limit should be equal to prevent container from swapping by default
|
||||
if swapLimit == 0 && SwapControllerAvailable() {
|
||||
s.Linux.Resources.Memory.Swap = &limit
|
||||
}
|
||||
}
|
||||
if swapLimit != 0 && SwapControllerAvailable() {
|
||||
s.Linux.Resources.Memory.Swap = &swapLimit
|
||||
}
|
||||
|
||||
if !disableHugetlbController {
|
||||
if isHugetlbControllerPresent() {
|
||||
for _, limit := range hugepages {
|
||||
s.Linux.Resources.HugepageLimits = append(s.Linux.Resources.HugepageLimits, runtimespec.LinuxHugepageLimit{
|
||||
Pagesize: limit.PageSize,
|
||||
Limit: limit.Limit,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
if !tolerateMissingHugetlbController {
|
||||
return errors.New("huge pages limits are specified but hugetlb cgroup controller is missing. " +
|
||||
"Please set tolerate_missing_hugetlb_controller to `true` to ignore this error")
|
||||
}
|
||||
log.L.Warn("hugetlb cgroup controller is absent. skipping huge pages limits")
|
||||
}
|
||||
}
|
||||
|
||||
if unified := resources.GetUnified(); unified != nil {
|
||||
if s.Linux.Resources.Unified == nil {
|
||||
s.Linux.Resources.Unified = make(map[string]string)
|
||||
}
|
||||
for k, v := range unified {
|
||||
s.Linux.Resources.Unified[k] = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithOOMScoreAdj sets the oom score
|
||||
func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
|
||||
resources := config.GetLinux().GetResources()
|
||||
if resources == nil {
|
||||
return nil
|
||||
}
|
||||
adj := int(resources.GetOomScoreAdj())
|
||||
if restrict {
|
||||
var err error
|
||||
adj, err = restrictOOMScoreAdj(adj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
s.Process.OOMScoreAdj = &adj
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPodOOMScoreAdj sets the oom score for the pod sandbox
|
||||
func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
if restrict {
|
||||
var err error
|
||||
adj, err = restrictOOMScoreAdj(adj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
s.Process.OOMScoreAdj = &adj
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func getCurrentOOMScoreAdj() (int, error) {
|
||||
b, err := os.ReadFile("/proc/self/oom_score_adj")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("could not get the daemon oom_score_adj: %w", err)
|
||||
}
|
||||
s := strings.TrimSpace(string(b))
|
||||
i, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("could not get the daemon oom_score_adj: %w", err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) {
|
||||
currentOOMScoreAdj, err := getCurrentOOMScoreAdj()
|
||||
if err != nil {
|
||||
return preferredOOMScoreAdj, err
|
||||
}
|
||||
if preferredOOMScoreAdj < currentOOMScoreAdj {
|
||||
return currentOOMScoreAdj, nil
|
||||
}
|
||||
return preferredOOMScoreAdj, nil
|
||||
}
|
||||
47
internal/cri/opts/spec_linux_test.go
Normal file
47
internal/cri/opts/spec_linux_test.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMergeGids(t *testing.T) {
|
||||
gids1 := []uint32{3, 2, 1}
|
||||
gids2 := []uint32{2, 3, 4}
|
||||
assert.Equal(t, []uint32{1, 2, 3, 4}, mergeGids(gids1, gids2))
|
||||
}
|
||||
|
||||
func TestRestrictOOMScoreAdj(t *testing.T) {
|
||||
current, err := getCurrentOOMScoreAdj()
|
||||
require.NoError(t, err)
|
||||
|
||||
got, err := restrictOOMScoreAdj(current - 1)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, got, current)
|
||||
|
||||
got, err = restrictOOMScoreAdj(current)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, got, current)
|
||||
|
||||
got, err = restrictOOMScoreAdj(current + 1)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, got, current+1)
|
||||
}
|
||||
42
internal/cri/opts/spec_nonlinux.go
Normal file
42
internal/cri/opts/spec_nonlinux.go
Normal file
@@ -0,0 +1,42 @@
|
||||
//go:build !linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func isHugetlbControllerPresent() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func SwapControllerAvailable() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// WithCDI does nothing on non-Linux platforms.
|
||||
func WithCDI(_ map[string]string, _ []*runtime.CDIDevice) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, container *containers.Container, spec *oci.Spec) error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
36
internal/cri/opts/spec_nonwindows.go
Normal file
36
internal/cri/opts/spec_nonwindows.go
Normal file
@@ -0,0 +1,36 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
"github.com/containerd/errdefs"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func WithProcessCommandLineOrArgsForWindows(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
return errdefs.ErrNotImplemented
|
||||
}
|
||||
}
|
||||
375
internal/cri/opts/spec_opts.go
Normal file
375
internal/cri/opts/spec_opts.go
Normal file
@@ -0,0 +1,375 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/internal/cri/util"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
)
|
||||
|
||||
// DefaultSandboxCPUshares is default cpu shares for sandbox container.
|
||||
// TODO(windows): Revisit cpu shares for windows (https://github.com/containerd/cri/issues/1297)
|
||||
const DefaultSandboxCPUshares = 2
|
||||
|
||||
// WithRelativeRoot sets the root for the container
|
||||
func WithRelativeRoot(root string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if s.Root == nil {
|
||||
s.Root = &runtimespec.Root{}
|
||||
}
|
||||
s.Root.Path = root
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithoutRoot sets the root to nil for the container.
|
||||
func WithoutRoot(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
s.Root = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithProcessArgs sets the process args on the spec based on the image and runtime config
|
||||
func WithProcessArgs(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
command, args := config.GetCommand(), config.GetArgs()
|
||||
// The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
|
||||
// TODO(random-liu): Clearly define the commands overwrite behavior.
|
||||
if len(command) == 0 {
|
||||
// Copy array to avoid data race.
|
||||
if len(args) == 0 {
|
||||
args = append([]string{}, image.Cmd...)
|
||||
}
|
||||
if command == nil {
|
||||
if !(len(image.Entrypoint) == 1 && image.Entrypoint[0] == "") {
|
||||
command = append([]string{}, image.Entrypoint...)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(command) == 0 && len(args) == 0 {
|
||||
return errors.New("no command specified")
|
||||
}
|
||||
return oci.WithProcessArgs(append(command, args...)...)(ctx, client, c, s)
|
||||
}
|
||||
}
|
||||
|
||||
// mounts defines how to sort runtime.Mount.
|
||||
// This is the same with the Docker implementation:
|
||||
//
|
||||
// https://github.com/moby/moby/blob/17.05.x/daemon/volumes.go#L26
|
||||
type orderedMounts []*runtime.Mount
|
||||
|
||||
// Len returns the number of mounts. Used in sorting.
|
||||
func (m orderedMounts) Len() int {
|
||||
return len(m)
|
||||
}
|
||||
|
||||
// Less returns true if the number of parts (a/b/c would be 3 parts) in the
|
||||
// mount indexed by parameter 1 is less than that of the mount indexed by
|
||||
// parameter 2. Used in sorting.
|
||||
func (m orderedMounts) Less(i, j int) bool {
|
||||
return m.parts(i) < m.parts(j)
|
||||
}
|
||||
|
||||
// Swap swaps two items in an array of mounts. Used in sorting
|
||||
func (m orderedMounts) Swap(i, j int) {
|
||||
m[i], m[j] = m[j], m[i]
|
||||
}
|
||||
|
||||
// parts returns the number of parts in the destination of a mount. Used in sorting.
|
||||
func (m orderedMounts) parts(i int) int {
|
||||
return strings.Count(filepath.Clean(m[i].ContainerPath), string(os.PathSeparator))
|
||||
}
|
||||
|
||||
// WithAnnotation sets the provided annotation
|
||||
func WithAnnotation(k, v string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Annotations == nil {
|
||||
s.Annotations = make(map[string]string)
|
||||
}
|
||||
s.Annotations[k] = v
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithAdditionalGIDs adds any additional groups listed for a particular user in the
|
||||
// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array.
|
||||
func WithAdditionalGIDs(userstr string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
gids := s.Process.User.AdditionalGids
|
||||
if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil {
|
||||
return err
|
||||
}
|
||||
// Merge existing gids and new gids.
|
||||
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func mergeGids(gids1, gids2 []uint32) []uint32 {
|
||||
gidsMap := make(map[uint32]struct{})
|
||||
for _, gid1 := range gids1 {
|
||||
gidsMap[gid1] = struct{}{}
|
||||
}
|
||||
for _, gid2 := range gids2 {
|
||||
gidsMap[gid2] = struct{}{}
|
||||
}
|
||||
var gids []uint32
|
||||
for gid := range gidsMap {
|
||||
gids = append(gids, gid)
|
||||
}
|
||||
sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] })
|
||||
return gids
|
||||
}
|
||||
|
||||
// WithoutDefaultSecuritySettings removes the default security settings generated on a spec
|
||||
func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
// Make sure no default seccomp/apparmor is specified
|
||||
s.Process.ApparmorProfile = ""
|
||||
if s.Linux != nil {
|
||||
s.Linux.Seccomp = nil
|
||||
}
|
||||
// Remove default rlimits (See https://github.com/containerd/cri/issues/515)
|
||||
s.Process.Rlimits = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithCapabilities sets the provided capabilities from the security context
|
||||
func WithCapabilities(sc *runtime.LinuxContainerSecurityContext, allCaps []string) oci.SpecOpts {
|
||||
capabilities := sc.GetCapabilities()
|
||||
if capabilities == nil {
|
||||
return nullOpt
|
||||
}
|
||||
|
||||
var opts []oci.SpecOpts
|
||||
// Add/drop all capabilities if "all" is specified, so that
|
||||
// following individual add/drop could still work. E.g.
|
||||
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
|
||||
// will be all capabilities without `CAP_CHOWN`.
|
||||
if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") {
|
||||
opts = append(opts, oci.WithCapabilities(allCaps))
|
||||
}
|
||||
if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") {
|
||||
opts = append(opts, oci.WithCapabilities(nil))
|
||||
}
|
||||
|
||||
var caps []string
|
||||
for _, c := range capabilities.GetAddCapabilities() {
|
||||
if strings.ToUpper(c) == "ALL" {
|
||||
continue
|
||||
}
|
||||
// Capabilities in CRI doesn't have `CAP_` prefix, so add it.
|
||||
caps = append(caps, "CAP_"+strings.ToUpper(c))
|
||||
}
|
||||
opts = append(opts, oci.WithAddedCapabilities(caps))
|
||||
|
||||
caps = []string{}
|
||||
for _, c := range capabilities.GetDropCapabilities() {
|
||||
if strings.ToUpper(c) == "ALL" {
|
||||
continue
|
||||
}
|
||||
caps = append(caps, "CAP_"+strings.ToUpper(c))
|
||||
}
|
||||
opts = append(opts, oci.WithDroppedCapabilities(caps))
|
||||
return oci.Compose(opts...)
|
||||
}
|
||||
|
||||
func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithoutAmbientCaps removes the ambient caps from the spec
|
||||
func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
if s.Process.Capabilities == nil {
|
||||
s.Process.Capabilities = &runtimespec.LinuxCapabilities{}
|
||||
}
|
||||
s.Process.Capabilities.Ambient = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithDisabledCgroups clears the Cgroups Path from the spec
|
||||
func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
s.Linux.CgroupsPath = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithSelinuxLabels sets the mount and process labels
|
||||
func WithSelinuxLabels(process, mount string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
s.Linux.MountLabel = mount
|
||||
s.Process.SelinuxLabel = process
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithSysctls sets the provided sysctls onto the spec
|
||||
func WithSysctls(sysctls map[string]string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
if s.Linux.Sysctl == nil {
|
||||
s.Linux.Sysctl = make(map[string]string)
|
||||
}
|
||||
for k, v := range sysctls {
|
||||
s.Linux.Sysctl[k] = v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithSupplementalGroups sets the supplemental groups for the process
|
||||
func WithSupplementalGroups(groups []int64) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Process == nil {
|
||||
s.Process = &runtimespec.Process{}
|
||||
}
|
||||
var guids []uint32
|
||||
for _, g := range groups {
|
||||
guids = append(guids, uint32(g))
|
||||
}
|
||||
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDefaultSandboxShares sets the default sandbox CPU shares
|
||||
func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
}
|
||||
if s.Linux.Resources == nil {
|
||||
s.Linux.Resources = &runtimespec.LinuxResources{}
|
||||
}
|
||||
if s.Linux.Resources.CPU == nil {
|
||||
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
|
||||
}
|
||||
i := uint64(DefaultSandboxCPUshares)
|
||||
s.Linux.Resources.CPU.Shares = &i
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithoutNamespace removes the provided namespace
|
||||
func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Linux == nil {
|
||||
return nil
|
||||
}
|
||||
var namespaces []runtimespec.LinuxNamespace
|
||||
for i, ns := range s.Linux.Namespaces {
|
||||
if ns.Type != t {
|
||||
namespaces = append(namespaces, s.Linux.Namespaces[i])
|
||||
}
|
||||
}
|
||||
s.Linux.Namespaces = namespaces
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPodNamespaces sets the pod namespaces for the container
|
||||
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
|
||||
namespaces := config.GetNamespaceOptions()
|
||||
|
||||
opts := []oci.SpecOpts{
|
||||
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(sandboxPid)}),
|
||||
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(sandboxPid)}),
|
||||
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(sandboxPid)}),
|
||||
}
|
||||
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
|
||||
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
|
||||
}
|
||||
|
||||
if namespaces.GetUsernsOptions() != nil {
|
||||
switch namespaces.GetUsernsOptions().GetMode() {
|
||||
case runtime.NamespaceMode_NODE:
|
||||
// Nothing to do. Not adding userns field uses the node userns.
|
||||
case runtime.NamespaceMode_POD:
|
||||
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
|
||||
opts = append(opts, oci.WithUserNamespace(uids, gids))
|
||||
}
|
||||
}
|
||||
|
||||
return oci.Compose(opts...)
|
||||
}
|
||||
|
||||
const (
|
||||
// netNSFormat is the format of network namespace of a process.
|
||||
netNSFormat = "/proc/%v/ns/net"
|
||||
// ipcNSFormat is the format of ipc namespace of a process.
|
||||
ipcNSFormat = "/proc/%v/ns/ipc"
|
||||
// utsNSFormat is the format of uts namespace of a process.
|
||||
utsNSFormat = "/proc/%v/ns/uts"
|
||||
// pidNSFormat is the format of pid namespace of a process.
|
||||
pidNSFormat = "/proc/%v/ns/pid"
|
||||
// userNSFormat is the format of user namespace of a process.
|
||||
userNSFormat = "/proc/%v/ns/user"
|
||||
)
|
||||
|
||||
// GetNetworkNamespace returns the network namespace of a process.
|
||||
func GetNetworkNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(netNSFormat, pid)
|
||||
}
|
||||
|
||||
// GetIPCNamespace returns the ipc namespace of a process.
|
||||
func GetIPCNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(ipcNSFormat, pid)
|
||||
}
|
||||
|
||||
// GetUTSNamespace returns the uts namespace of a process.
|
||||
func GetUTSNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(utsNSFormat, pid)
|
||||
}
|
||||
|
||||
// GetPIDNamespace returns the pid namespace of a process.
|
||||
func GetPIDNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(pidNSFormat, pid)
|
||||
}
|
||||
|
||||
// GetUserNamespace returns the user namespace of a process.
|
||||
func GetUserNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(userNSFormat, pid)
|
||||
}
|
||||
46
internal/cri/opts/spec_opts_test.go
Normal file
46
internal/cri/opts/spec_opts_test.go
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func TestOrderedMounts(t *testing.T) {
|
||||
mounts := []*runtime.Mount{
|
||||
{ContainerPath: "/a/b/c"},
|
||||
{ContainerPath: "/a/b"},
|
||||
{ContainerPath: "/a/b/c/d"},
|
||||
{ContainerPath: "/a"},
|
||||
{ContainerPath: "/b"},
|
||||
{ContainerPath: "/b/c"},
|
||||
}
|
||||
expected := []*runtime.Mount{
|
||||
{ContainerPath: "/a"},
|
||||
{ContainerPath: "/b"},
|
||||
{ContainerPath: "/a/b"},
|
||||
{ContainerPath: "/b/c"},
|
||||
{ContainerPath: "/a/b/c"},
|
||||
{ContainerPath: "/a/b/c/d"},
|
||||
}
|
||||
sort.Stable(orderedMounts(mounts))
|
||||
assert.Equal(t, expected, mounts)
|
||||
}
|
||||
121
internal/cri/opts/spec_windows.go
Normal file
121
internal/cri/opts/spec_windows.go
Normal file
@@ -0,0 +1,121 @@
|
||||
//go:build windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/windows"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func escapeAndCombineArgsWindows(args []string) string {
|
||||
escaped := make([]string, len(args))
|
||||
for i, a := range args {
|
||||
escaped[i] = windows.EscapeArg(a)
|
||||
}
|
||||
return strings.Join(escaped, " ")
|
||||
}
|
||||
|
||||
// WithProcessCommandLineOrArgsForWindows sets the process command line or process args on the spec based on the image
|
||||
// and runtime config
|
||||
// If image.ArgsEscaped field is set, this function sets the process command line and if not, it sets the
|
||||
// process args field
|
||||
func WithProcessCommandLineOrArgsForWindows(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
|
||||
if image.ArgsEscaped { //nolint:staticcheck // ArgsEscaped is deprecated
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
// firstArgFromImg is a flag that is returned to indicate that the first arg in the slice comes from either the
|
||||
// image Entrypoint or Cmd. If the first arg instead comes from the container config (e.g. overriding the image values),
|
||||
// it should be false. This is done to support the non-OCI ArgsEscaped field that Docker used to determine how the image
|
||||
// entrypoint and cmd should be interpreted.
|
||||
//
|
||||
args, firstArgFromImg, err := getArgs(image.Entrypoint, image.Cmd, config.GetCommand(), config.GetArgs())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var cmdLine string
|
||||
if image.ArgsEscaped && firstArgFromImg { //nolint:staticcheck // ArgsEscaped is deprecated
|
||||
cmdLine = args[0]
|
||||
if len(args) > 1 {
|
||||
cmdLine += " " + escapeAndCombineArgsWindows(args[1:])
|
||||
}
|
||||
} else {
|
||||
cmdLine = escapeAndCombineArgsWindows(args)
|
||||
}
|
||||
|
||||
return oci.WithProcessCommandLine(cmdLine)(ctx, client, c, s)
|
||||
}
|
||||
}
|
||||
// if ArgsEscaped is not set
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
args, _, err := getArgs(image.Entrypoint, image.Cmd, config.GetCommand(), config.GetArgs())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return oci.WithProcessArgs(args...)(ctx, client, c, s)
|
||||
}
|
||||
}
|
||||
|
||||
// getArgs is used to evaluate the overall args for the container by taking into account the image command and entrypoints
|
||||
// along with the container command and entrypoints specified through the podspec if any
|
||||
func getArgs(imgEntrypoint []string, imgCmd []string, ctrEntrypoint []string, ctrCmd []string) ([]string, bool, error) {
|
||||
//nolint:dupword
|
||||
// firstArgFromImg is a flag that is returned to indicate that the first arg in the slice comes from either the image
|
||||
// Entrypoint or Cmd. If the first arg instead comes from the container config (e.g. overriding the image values),
|
||||
// it should be false.
|
||||
// Essentially this means firstArgFromImg should be true iff:
|
||||
// Ctr entrypoint ctr cmd image entrypoint image cmd firstArgFromImg
|
||||
// --------------------------------------------------------------------------------
|
||||
// nil nil exists nil true
|
||||
// nil nil nil exists true
|
||||
|
||||
// This is needed to support the non-OCI ArgsEscaped field used by Docker. ArgsEscaped is used for
|
||||
// Windows images to indicate that the command has already been escaped and should be
|
||||
// used directly as the command line.
|
||||
var firstArgFromImg bool
|
||||
entrypoint, cmd := ctrEntrypoint, ctrCmd
|
||||
// The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
|
||||
// TODO(random-liu): Clearly define the commands overwrite behavior.
|
||||
if len(entrypoint) == 0 {
|
||||
// Copy array to avoid data race.
|
||||
if len(cmd) == 0 {
|
||||
cmd = append([]string{}, imgCmd...)
|
||||
if len(imgCmd) > 0 {
|
||||
firstArgFromImg = true
|
||||
}
|
||||
}
|
||||
if entrypoint == nil {
|
||||
entrypoint = append([]string{}, imgEntrypoint...)
|
||||
if len(imgEntrypoint) > 0 || len(ctrCmd) == 0 {
|
||||
firstArgFromImg = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(entrypoint) == 0 && len(cmd) == 0 {
|
||||
return nil, false, errors.New("no command specified")
|
||||
}
|
||||
return append(entrypoint, cmd...), firstArgFromImg, nil
|
||||
}
|
||||
261
internal/cri/opts/spec_windows_opts.go
Normal file
261
internal/cri/opts/spec_windows_opts.go
Normal file
@@ -0,0 +1,261 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
osinterface "github.com/containerd/containerd/v2/pkg/os"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
// namedPipePath returns true if the given path is to a named pipe.
|
||||
func namedPipePath(p string) bool {
|
||||
return strings.HasPrefix(p, `\\.\pipe\`)
|
||||
}
|
||||
|
||||
// cleanMount returns a cleaned version of the mount path. The input is returned
|
||||
// as-is if it is a named pipe path.
|
||||
func cleanMount(p string) string {
|
||||
if namedPipePath(p) {
|
||||
return p
|
||||
}
|
||||
return filepath.Clean(p)
|
||||
}
|
||||
|
||||
func parseMount(osi osinterface.OS, mount *runtime.Mount) (*runtimespec.Mount, error) {
|
||||
var (
|
||||
dst = mount.GetContainerPath()
|
||||
src = mount.GetHostPath()
|
||||
)
|
||||
// In the case of a named pipe mount on Windows, don't stat the file
|
||||
// or do other operations that open it, as that could interfere with
|
||||
// the listening process. filepath.Clean also breaks named pipe
|
||||
// paths, so don't use it.
|
||||
if !namedPipePath(src) {
|
||||
if _, err := osi.Stat(src); err != nil {
|
||||
// Create the host path if it doesn't exist. This will align
|
||||
// the behavior with the Linux implementation, but it doesn't
|
||||
// align with Docker's behavior on Windows.
|
||||
if !os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("failed to stat %q: %w", src, err)
|
||||
}
|
||||
if err := osi.MkdirAll(src, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to mkdir %q: %w", src, err)
|
||||
}
|
||||
}
|
||||
var err error
|
||||
originalSrc := src
|
||||
src, err = osi.ResolveSymbolicLink(src)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve symlink %q: %w", originalSrc, err)
|
||||
}
|
||||
// hcsshim requires clean path, especially '/' -> '\'. Additionally,
|
||||
// for the destination, absolute paths should have the C: prefix.
|
||||
src = filepath.Clean(src)
|
||||
|
||||
// filepath.Clean adds a '.' at the end if the path is a
|
||||
// drive (like Z:, E: etc.). Keeping this '.' in the path
|
||||
// causes incorrect parameter error when starting the
|
||||
// container on windows. Remove it here.
|
||||
if !(len(dst) == 2 && dst[1] == ':') {
|
||||
dst = filepath.Clean(dst)
|
||||
if dst[0] == '\\' {
|
||||
dst = "C:" + dst
|
||||
}
|
||||
} else if dst[0] == 'c' || dst[0] == 'C' {
|
||||
return nil, fmt.Errorf("destination path can not be C drive")
|
||||
}
|
||||
}
|
||||
|
||||
var options []string
|
||||
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
|
||||
// is readonly. This is different from docker's behavior, but make more sense.
|
||||
if mount.GetReadonly() {
|
||||
options = append(options, "ro")
|
||||
} else {
|
||||
options = append(options, "rw")
|
||||
}
|
||||
return &runtimespec.Mount{Source: src, Destination: dst, Options: options}, nil
|
||||
}
|
||||
|
||||
// WithWindowsMounts sorts and adds runtime and CRI mounts to the spec for
|
||||
// windows container.
|
||||
func WithWindowsMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) error {
|
||||
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
|
||||
// is mounted by both a CRI mount and an extra mount, the CRI mount will
|
||||
// be kept.
|
||||
var (
|
||||
criMounts = config.GetMounts()
|
||||
mounts = append([]*runtime.Mount{}, criMounts...)
|
||||
)
|
||||
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
|
||||
for _, e := range extra {
|
||||
found := false
|
||||
for _, c := range criMounts {
|
||||
if cleanMount(e.ContainerPath) == cleanMount(c.ContainerPath) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
mounts = append(mounts, e)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort mounts in number of parts. This ensures that high level mounts don't
|
||||
// shadow other mounts.
|
||||
sort.Sort(orderedMounts(mounts))
|
||||
|
||||
// Copy all mounts from default mounts, except for
|
||||
// mounts overridden by supplied mount;
|
||||
mountSet := make(map[string]struct{})
|
||||
for _, m := range mounts {
|
||||
mountSet[cleanMount(m.ContainerPath)] = struct{}{}
|
||||
}
|
||||
|
||||
defaultMounts := s.Mounts
|
||||
s.Mounts = nil
|
||||
|
||||
for _, m := range defaultMounts {
|
||||
dst := cleanMount(m.Destination)
|
||||
if _, ok := mountSet[dst]; ok {
|
||||
// filter out mount overridden by a supplied mount
|
||||
continue
|
||||
}
|
||||
s.Mounts = append(s.Mounts, m)
|
||||
}
|
||||
|
||||
for _, mount := range mounts {
|
||||
parsedMount, err := parseMount(osi, mount)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.Mounts = append(s.Mounts, *parsedMount)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithWindowsResources sets the provided resource restrictions for windows.
|
||||
func WithWindowsResources(resources *runtime.WindowsContainerResources) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if resources == nil {
|
||||
return nil
|
||||
}
|
||||
if s.Windows == nil {
|
||||
s.Windows = &runtimespec.Windows{}
|
||||
}
|
||||
if s.Windows.Resources == nil {
|
||||
s.Windows.Resources = &runtimespec.WindowsResources{}
|
||||
}
|
||||
if s.Windows.Resources.Memory == nil {
|
||||
s.Windows.Resources.Memory = &runtimespec.WindowsMemoryResources{}
|
||||
}
|
||||
|
||||
var (
|
||||
count = uint64(resources.GetCpuCount())
|
||||
shares = uint16(resources.GetCpuShares())
|
||||
max = uint16(resources.GetCpuMaximum())
|
||||
limit = uint64(resources.GetMemoryLimitInBytes())
|
||||
)
|
||||
if s.Windows.Resources.CPU == nil && (count != 0 || shares != 0 || max != 0) {
|
||||
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
|
||||
}
|
||||
if count != 0 {
|
||||
s.Windows.Resources.CPU.Count = &count
|
||||
}
|
||||
if shares != 0 {
|
||||
s.Windows.Resources.CPU.Shares = &shares
|
||||
}
|
||||
if max != 0 {
|
||||
s.Windows.Resources.CPU.Maximum = &max
|
||||
}
|
||||
if limit != 0 {
|
||||
s.Windows.Resources.Memory.Limit = &limit
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithWindowsDefaultSandboxShares sets the default sandbox CPU shares
|
||||
func WithWindowsDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Windows == nil {
|
||||
s.Windows = &runtimespec.Windows{}
|
||||
}
|
||||
if s.Windows.Resources == nil {
|
||||
s.Windows.Resources = &runtimespec.WindowsResources{}
|
||||
}
|
||||
if s.Windows.Resources.CPU == nil {
|
||||
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
|
||||
}
|
||||
i := uint16(DefaultSandboxCPUshares)
|
||||
s.Windows.Resources.CPU.Shares = &i
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithWindowsCredentialSpec assigns `credentialSpec` to the
|
||||
// `runtime.Spec.Windows.CredentialSpec` field.
|
||||
func WithWindowsCredentialSpec(credentialSpec string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
|
||||
if s.Windows == nil {
|
||||
s.Windows = &runtimespec.Windows{}
|
||||
}
|
||||
s.Windows.CredentialSpec = credentialSpec
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithWindowsDevices sets the provided devices onto the container spec
|
||||
func WithWindowsDevices(config *runtime.ContainerConfig) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
for _, device := range config.GetDevices() {
|
||||
if device.ContainerPath != "" {
|
||||
return fmt.Errorf("unexpected ContainerPath %s, must be empty", device.ContainerPath)
|
||||
}
|
||||
|
||||
if device.Permissions != "" {
|
||||
return fmt.Errorf("unexpected Permissions %s, must be empty", device.Permissions)
|
||||
}
|
||||
|
||||
hostPath := device.HostPath
|
||||
if strings.HasPrefix(hostPath, "class/") {
|
||||
hostPath = "class://" + strings.TrimPrefix(hostPath, "class/")
|
||||
}
|
||||
|
||||
idType, id, ok := strings.Cut(hostPath, "://")
|
||||
if !ok {
|
||||
return fmt.Errorf("unrecognised HostPath format %v, must match IDType://ID", device.HostPath)
|
||||
}
|
||||
|
||||
o := oci.WithWindowsDevice(idType, id)
|
||||
if err := o(ctx, client, c, s); err != nil {
|
||||
return fmt.Errorf("failed adding device with HostPath %v: %w", device.HostPath, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
249
internal/cri/opts/spec_windows_test.go
Normal file
249
internal/cri/opts/spec_windows_test.go
Normal file
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/pkg/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
osinterface "github.com/containerd/containerd/v2/pkg/os"
|
||||
)
|
||||
|
||||
func TestWithDevices(t *testing.T) {
|
||||
testcases := []struct {
|
||||
name string
|
||||
devices []*runtime.Device
|
||||
isLCOW bool
|
||||
|
||||
expectError bool
|
||||
expectedWindowsDevices []specs.WindowsDevice
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
|
||||
expectError: false,
|
||||
},
|
||||
// The only supported field is HostPath
|
||||
{
|
||||
name: "empty fields",
|
||||
devices: []*runtime.Device{{}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "containerPath",
|
||||
devices: []*runtime.Device{{ContainerPath: "something"}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "permissions",
|
||||
devices: []*runtime.Device{{Permissions: "something"}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
// Produced by https://github.com/aarnaud/k8s-directx-device-plugin/blob/0f3db32622daa577c85621941682bee6f9080954/cmd/k8s-device-plugin/main.go
|
||||
// This is also the syntax dockershim and cri-dockerd support (or rather, pass through to docker, which parses this syntax)
|
||||
{
|
||||
name: "hostPath_docker_style",
|
||||
devices: []*runtime.Device{{HostPath: "class/5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: "5B45201D-F2F2-4F3B-85BB-30FF1F953599", IDType: "class"}},
|
||||
},
|
||||
// Docker _only_ accepts `class` ID Type, so anything else should fail.
|
||||
// See https://github.com/moby/moby/blob/v20.10.13/daemon/oci_windows.go#L283-L294
|
||||
{
|
||||
name: "hostPath_docker_style_non-class_idtype",
|
||||
devices: []*runtime.Device{{HostPath: "vpci-location-path/5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
// A bunch of examples from https://github.com/microsoft/hcsshim/blob/v0.9.2/test/cri-containerd/container_virtual_device_test.go
|
||||
{
|
||||
name: "hostPath_hcsshim_lcow_gpu",
|
||||
// Not actually a GPU PCIP instance, but my personal machine doesn't have any PCIP devices, so I found one on the 'net.
|
||||
devices: []*runtime.Device{{HostPath: `gpu://PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`}},
|
||||
isLCOW: true,
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: `PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`, IDType: "gpu"}},
|
||||
},
|
||||
{
|
||||
name: "hostPath_hcsshim_wcow_location_path",
|
||||
devices: []*runtime.Device{{HostPath: "vpci-location-path://PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)"}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: "PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)", IDType: "vpci-location-path"}},
|
||||
},
|
||||
{
|
||||
name: "hostPath_hcsshim_wcow_class_guid",
|
||||
devices: []*runtime.Device{{HostPath: "class://5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: "5B45201D-F2F2-4F3B-85BB-30FF1F953599", IDType: "class"}},
|
||||
},
|
||||
{
|
||||
name: "hostPath_hcsshim_wcow_gpu_hyper-v",
|
||||
// Not actually a GPU PCIP instance, but my personal machine doesn't have any PCIP devices, so I found one on the 'net.
|
||||
devices: []*runtime.Device{{HostPath: `vpci://PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: `PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`, IDType: "vpci"}},
|
||||
},
|
||||
// Example from https://github.com/microsoft/hcsshim/blob/v0.9.2/test/cri-containerd/container_test.go
|
||||
// According to https://github.com/jterry75/cri/blob/f8e83e63cc027d0e9c0c984f9db3cba58d3672d4/pkg/server/container_create_windows.go#L625-L649
|
||||
// this is intended to generate LinuxDevice entries that the GCS shim in a LCOW container host will remap
|
||||
// into device mounts from its own in-UVM kernel.
|
||||
// From discussion on https://github.com/containerd/containerd/pull/6618, we reject this syntax for now.
|
||||
{
|
||||
name: "hostPath_hcsshim_lcow_sandbox_device",
|
||||
devices: []*runtime.Device{{HostPath: "/dev/fuse"}},
|
||||
isLCOW: true,
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
// Some edge cases suggested by the above real-world examples
|
||||
{
|
||||
name: "hostPath_no_slash",
|
||||
devices: []*runtime.Device{{HostPath: "no_slash"}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "hostPath_but_no_type",
|
||||
devices: []*runtime.Device{{HostPath: "://5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "hostPath_but_no_id",
|
||||
devices: []*runtime.Device{{HostPath: "gpu://"}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: "", IDType: "gpu"}},
|
||||
},
|
||||
{
|
||||
name: "hostPath_dockerstyle_with_slashes_in_id",
|
||||
devices: []*runtime.Device{{HostPath: "class/slashed/id"}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{{ID: "slashed/id", IDType: "class"}},
|
||||
},
|
||||
{
|
||||
name: "hostPath_docker_style_non-class_idtypewith_slashes_in_id",
|
||||
devices: []*runtime.Device{{HostPath: "vpci-location-path/slashed/id"}},
|
||||
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "hostPath_hcsshim_wcow_location_path_twice",
|
||||
devices: []*runtime.Device{
|
||||
{HostPath: "vpci-location-path://PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)"},
|
||||
{HostPath: "vpci-location-path://PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0002)"}},
|
||||
|
||||
expectError: false,
|
||||
expectedWindowsDevices: []specs.WindowsDevice{
|
||||
{ID: "PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)", IDType: "vpci-location-path"},
|
||||
{ID: "PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0002)", IDType: "vpci-location-path"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testcases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
var (
|
||||
ctx = namespaces.WithNamespace(context.Background(), "testing")
|
||||
c = &containers.Container{ID: t.Name()}
|
||||
)
|
||||
|
||||
config := runtime.ContainerConfig{}
|
||||
config.Devices = tc.devices
|
||||
|
||||
specOpts := []oci.SpecOpts{WithWindowsDevices(&config)}
|
||||
|
||||
platform := "windows"
|
||||
if tc.isLCOW {
|
||||
platform = "linux"
|
||||
}
|
||||
|
||||
spec, err := oci.GenerateSpecWithPlatform(ctx, nil, platform, c, specOpts...)
|
||||
if tc.expectError {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Ensure we got the right LCOWness in the spec
|
||||
if tc.isLCOW {
|
||||
assert.NotNil(t, spec.Linux)
|
||||
} else {
|
||||
assert.Nil(t, spec.Linux)
|
||||
}
|
||||
|
||||
if len(tc.expectedWindowsDevices) != 0 {
|
||||
require.NotNil(t, spec.Windows)
|
||||
require.NotNil(t, spec.Windows.Devices)
|
||||
assert.Equal(t, spec.Windows.Devices, tc.expectedWindowsDevices)
|
||||
} else if spec.Windows != nil && spec.Windows.Devices != nil {
|
||||
assert.Empty(t, spec.Windows.Devices)
|
||||
}
|
||||
|
||||
if spec.Linux != nil && spec.Linux.Devices != nil {
|
||||
assert.Empty(t, spec.Linux.Devices)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriveMounts(t *testing.T) {
|
||||
tests := []struct {
|
||||
mnt *runtime.Mount
|
||||
expectedContainerPath string
|
||||
expectedError error
|
||||
}{
|
||||
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `D:\foo`}, `D:\foo`, nil},
|
||||
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `D:\`}, `D:\`, nil},
|
||||
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `D:`}, `D:`, nil},
|
||||
{&runtime.Mount{HostPath: `\\.\pipe\a_fake_pipe_name_that_shouldnt_exist`, ContainerPath: `\\.\pipe\foo`}, `\\.\pipe\foo`, nil},
|
||||
// If `C:\` is passed as container path it should continue and forward that to HCS and fail
|
||||
// to align with docker's behavior.
|
||||
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `C:\`}, `C:\`, nil},
|
||||
|
||||
// If `C:` is passed we can detect and fail immediately.
|
||||
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `C:`}, ``, fmt.Errorf("destination path can not be C drive")},
|
||||
}
|
||||
var realOS osinterface.RealOS
|
||||
for _, test := range tests {
|
||||
parsedMount, err := parseMount(realOS, test.mnt)
|
||||
if err != nil && !strings.EqualFold(err.Error(), test.expectedError.Error()) {
|
||||
t.Fatalf("expected err: %s, got %s instead", test.expectedError, err)
|
||||
} else if err == nil && test.expectedContainerPath != parsedMount.Destination {
|
||||
t.Fatalf("expected container path: %s, got %s instead", test.expectedContainerPath, parsedMount.Destination)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user