diff --git a/Makefile b/Makefile index 0d3d92aa2..4e6079963 100644 --- a/Makefile +++ b/Makefile @@ -15,8 +15,12 @@ GO := go GOOS := $(shell $(GO) env GOOS) GOARCH := $(shell $(GO) env GOARCH) -WHALE = "🇩" -ONI = "👹" +WHALE := "🇩" +ONI := "👹" +ifeq ($(GOOS),windows) + WHALE = "+" + ONI = "-" +endif EPOCH_TEST_COMMIT := f9e02affccd51702191e5312665a16045ffef8ab PROJECT := github.com/containerd/cri BINDIR := ${DESTDIR}/usr/local/bin @@ -26,13 +30,19 @@ BUILD_DIR := _output VERSION := $(shell git rev-parse --short HEAD) TARBALL_PREFIX := cri-containerd TARBALL := $(TARBALL_PREFIX)-$(VERSION).$(GOOS)-$(GOARCH).tar.gz -BUILD_TAGS := seccomp apparmor +ifneq ($(GOOS),windows) + BUILD_TAGS := seccomp apparmor +endif # Add `-TEST` suffix to indicate that all binaries built from this repo are for test. GO_LDFLAGS := -X $(PROJECT)/vendor/github.com/containerd/containerd/version.Version=$(VERSION)-TEST SOURCES := $(shell find cmd/ pkg/ vendor/ -name '*.go') PLUGIN_SOURCES := $(shell ls *.go) INTEGRATION_SOURCES := $(shell find integration/ -name '*.go') +ifeq ($(GOOS),windows) + BIN_EXT := .exe +endif + all: binaries help: ## this help @@ -74,7 +84,7 @@ update-vendor: sync-vendor sort-vendor ## Syncs containerd/vendor.conf -> vendor $(BUILD_DIR)/containerd: $(SOURCES) $(PLUGIN_SOURCES) @echo "$(WHALE) $@" - $(GO) build -o $@ \ + $(GO) build -o $@$(BIN_EXT) \ -tags '$(BUILD_TAGS)' \ -ldflags '$(GO_LDFLAGS)' \ -gcflags '$(GO_GCFLAGS)' \ diff --git a/cmd/containerd/builtins_unix.go b/cmd/containerd/builtins_unix.go new file mode 100644 index 000000000..9490b1937 --- /dev/null +++ b/cmd/containerd/builtins_unix.go @@ -0,0 +1,25 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + _ "github.com/containerd/containerd/metrics/cgroups" + _ "github.com/containerd/containerd/runtime/v1/linux" + _ "github.com/containerd/containerd/snapshots/overlay" +) diff --git a/cmd/containerd/builtins_windows.go b/cmd/containerd/builtins_windows.go new file mode 100644 index 000000000..a82c08ff0 --- /dev/null +++ b/cmd/containerd/builtins_windows.go @@ -0,0 +1,25 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + _ "github.com/containerd/containerd/diff/lcow" + _ "github.com/containerd/containerd/diff/windows" + _ "github.com/containerd/containerd/snapshots/windows" +) diff --git a/cmd/containerd/containerd.go b/cmd/containerd/containerd.go index 66620c2d4..8446e7446 100644 --- a/cmd/containerd/containerd.go +++ b/cmd/containerd/containerd.go @@ -24,8 +24,6 @@ import ( _ "github.com/containerd/containerd/diff/walking/plugin" _ "github.com/containerd/containerd/gc/scheduler" - _ "github.com/containerd/containerd/metrics/cgroups" - _ "github.com/containerd/containerd/runtime/v1/linux" _ "github.com/containerd/containerd/runtime/v2" _ "github.com/containerd/containerd/services/containers" _ "github.com/containerd/containerd/services/content" @@ -39,7 +37,6 @@ import ( _ "github.com/containerd/containerd/services/snapshots" _ "github.com/containerd/containerd/services/tasks" _ "github.com/containerd/containerd/services/version" - _ "github.com/containerd/containerd/snapshots/overlay" _ "github.com/containerd/cri" "github.com/containerd/containerd/cmd/containerd/command" diff --git a/pkg/config/config.go b/pkg/config/config.go index fcd743203..134c4dacb 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -21,11 +21,9 @@ import ( "time" "github.com/BurntSushi/toml" - "github.com/containerd/containerd" "github.com/containerd/containerd/log" "github.com/containerd/containerd/plugin" "github.com/pkg/errors" - "k8s.io/kubernetes/pkg/kubelet/server/streaming" ) // Runtime struct to contain the type(ID), engine, and root variables for a default runtime @@ -227,51 +225,6 @@ type Config struct { StateDir string `json:"stateDir"` } -// DefaultConfig returns default configurations of cri plugin. -func DefaultConfig() PluginConfig { - return PluginConfig{ - CniConfig: CniConfig{ - NetworkPluginBinDir: "/opt/cni/bin", - NetworkPluginConfDir: "/etc/cni/net.d", - NetworkPluginMaxConfNum: 1, // only one CNI plugin config file will be loaded - NetworkPluginConfTemplate: "", - }, - ContainerdConfig: ContainerdConfig{ - Snapshotter: containerd.DefaultSnapshotter, - DefaultRuntimeName: "runc", - NoPivot: false, - Runtimes: map[string]Runtime{ - "runc": { - Type: "io.containerd.runc.v1", - }, - }, - }, - DisableTCPService: true, - StreamServerAddress: "127.0.0.1", - StreamServerPort: "0", - StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour - EnableSelinux: false, - EnableTLSStreaming: false, - X509KeyPairStreaming: X509KeyPairStreaming{ - TLSKeyFile: "", - TLSCertFile: "", - }, - SandboxImage: "k8s.gcr.io/pause:3.1", - StatsCollectPeriod: 10, - SystemdCgroup: false, - MaxContainerLogLineSize: 16 * 1024, - Registry: Registry{ - Mirrors: map[string]Mirror{ - "docker.io": { - Endpoints: []string{"https://registry-1.docker.io"}, - }, - }, - }, - MaxConcurrentDownloads: 3, - DisableProcMount: false, - } -} - const ( // RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime RuntimeUntrusted = "untrusted" diff --git a/pkg/config/config_unix.go b/pkg/config/config_unix.go new file mode 100644 index 000000000..fdf3e6b92 --- /dev/null +++ b/pkg/config/config_unix.go @@ -0,0 +1,69 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "github.com/containerd/containerd" + "k8s.io/kubernetes/pkg/kubelet/server/streaming" +) + +// DefaultConfig returns default configurations of cri plugin. +func DefaultConfig() PluginConfig { + return PluginConfig{ + CniConfig: CniConfig{ + NetworkPluginBinDir: "/opt/cni/bin", + NetworkPluginConfDir: "/etc/cni/net.d", + NetworkPluginMaxConfNum: 1, // only one CNI plugin config file will be loaded + NetworkPluginConfTemplate: "", + }, + ContainerdConfig: ContainerdConfig{ + Snapshotter: containerd.DefaultSnapshotter, + DefaultRuntimeName: "runc", + NoPivot: false, + Runtimes: map[string]Runtime{ + "runc": { + Type: "io.containerd.runc.v1", + }, + }, + }, + DisableTCPService: true, + StreamServerAddress: "127.0.0.1", + StreamServerPort: "0", + StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour + EnableSelinux: false, + EnableTLSStreaming: false, + X509KeyPairStreaming: X509KeyPairStreaming{ + TLSKeyFile: "", + TLSCertFile: "", + }, + SandboxImage: "k8s.gcr.io/pause:3.1", + StatsCollectPeriod: 10, + SystemdCgroup: false, + MaxContainerLogLineSize: 16 * 1024, + Registry: Registry{ + Mirrors: map[string]Mirror{ + "docker.io": { + Endpoints: []string{"https://registry-1.docker.io"}, + }, + }, + }, + MaxConcurrentDownloads: 3, + DisableProcMount: false, + } +} diff --git a/pkg/config/config_windows.go b/pkg/config/config_windows.go new file mode 100644 index 000000000..3489bc5df --- /dev/null +++ b/pkg/config/config_windows.go @@ -0,0 +1,24 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +// DefaultConfig returns default configurations of cri plugin. +func DefaultConfig() PluginConfig { + return PluginConfig{} +} diff --git a/pkg/containerd/opts/spec.go b/pkg/containerd/opts/spec.go index 0da421a5e..da0dec584 100644 --- a/pkg/containerd/opts/spec.go +++ b/pkg/containerd/opts/spec.go @@ -18,100 +18,19 @@ package opts import ( "context" - "fmt" - "io/ioutil" "os" "path/filepath" - "sort" - "strconv" "strings" "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" "github.com/containerd/containerd/oci" imagespec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/opencontainers/runc/libcontainer/devices" runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" - "golang.org/x/sys/unix" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" - - osinterface "github.com/containerd/cri/pkg/os" - "github.com/containerd/cri/pkg/util" ) -const ( - // DefaultSandboxCPUshares is default cpu shares for sandbox container. - DefaultSandboxCPUshares = 2 -) - -// WithAdditionalGIDs adds any additional groups listed for a particular user in the -// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array. -func WithAdditionalGIDs(userstr string) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - gids := s.Process.User.AdditionalGids - if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil { - return err - } - // Merge existing gids and new gids. - s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids) - return nil - } -} - -func mergeGids(gids1, gids2 []uint32) []uint32 { - gidsMap := make(map[uint32]struct{}) - for _, gid1 := range gids1 { - gidsMap[gid1] = struct{}{} - } - for _, gid2 := range gids2 { - gidsMap[gid2] = struct{}{} - } - var gids []uint32 - for gid := range gidsMap { - gids = append(gids, gid) - } - sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] }) - return gids -} - -// WithoutRunMount removes the `/run` inside the spec -func WithoutRunMount(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { - var ( - mounts []runtimespec.Mount - current = s.Mounts - ) - for _, m := range current { - if filepath.Clean(m.Destination) == "/run" { - continue - } - mounts = append(mounts, m) - } - s.Mounts = mounts - return nil -} - -// WithoutDefaultSecuritySettings removes the default security settings generated on a spec -func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - // Make sure no default seccomp/apparmor is specified - s.Process.ApparmorProfile = "" - if s.Linux != nil { - s.Linux.Seccomp = nil - } - // Remove default rlimits (See issue #515) - s.Process.Rlimits = nil - return nil -} - // WithRelativeRoot sets the root for the container func WithRelativeRoot(root string) oci.SpecOpts { return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { @@ -145,141 +64,6 @@ func WithProcessArgs(config *runtime.ContainerConfig, image *imagespec.ImageConf } } -// WithMounts sorts and adds runtime and CRI mounts to the spec -func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) { - // mergeMounts merge CRI mounts with extra mounts. If a mount destination - // is mounted by both a CRI mount and an extra mount, the CRI mount will - // be kept. - var ( - criMounts = config.GetMounts() - mounts = append([]*runtime.Mount{}, criMounts...) - ) - // Copy all mounts from extra mounts, except for mounts overriden by CRI. - for _, e := range extra { - found := false - for _, c := range criMounts { - if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) { - found = true - break - } - } - if !found { - mounts = append(mounts, e) - } - } - // --- - - // Sort mounts in number of parts. This ensures that high level mounts don't - // shadow other mounts. - sort.Sort(orderedMounts(mounts)) - - // Mount cgroup into the container as readonly, which inherits docker's behavior. - s.Mounts = append(s.Mounts, runtimespec.Mount{ - Source: "cgroup", - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, - }) - - // Copy all mounts from default mounts, except for - // - mounts overriden by supplied mount; - // - all mounts under /dev if a supplied /dev is present. - mountSet := make(map[string]struct{}) - for _, m := range mounts { - mountSet[filepath.Clean(m.ContainerPath)] = struct{}{} - } - - defaultMounts := s.Mounts - s.Mounts = nil - - for _, m := range defaultMounts { - dst := filepath.Clean(m.Destination) - if _, ok := mountSet[dst]; ok { - // filter out mount overridden by a supplied mount - continue - } - if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") { - // filter out everything under /dev if /dev is a supplied mount - continue - } - s.Mounts = append(s.Mounts, m) - } - - for _, mount := range mounts { - var ( - dst = mount.GetContainerPath() - src = mount.GetHostPath() - ) - // Create the host path if it doesn't exist. - // TODO(random-liu): Add CRI validation test for this case. - if _, err := osi.Stat(src); err != nil { - if !os.IsNotExist(err) { - return errors.Wrapf(err, "failed to stat %q", src) - } - if err := osi.MkdirAll(src, 0755); err != nil { - return errors.Wrapf(err, "failed to mkdir %q", src) - } - } - // TODO(random-liu): Add cri-containerd integration test or cri validation test - // for this. - src, err := osi.ResolveSymbolicLink(src) - if err != nil { - return errors.Wrapf(err, "failed to resolve symlink %q", src) - } - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - options := []string{"rbind"} - switch mount.GetPropagation() { - case runtime.MountPropagation_PROPAGATION_PRIVATE: - options = append(options, "rprivate") - // Since default root propogation in runc is rprivate ignore - // setting the root propagation - case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL: - if err := ensureShared(src, osi.LookupMount); err != nil { - return err - } - options = append(options, "rshared") - s.Linux.RootfsPropagation = "rshared" - case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER: - if err := ensureSharedOrSlave(src, osi.LookupMount); err != nil { - return err - } - options = append(options, "rslave") - if s.Linux.RootfsPropagation != "rshared" && - s.Linux.RootfsPropagation != "rslave" { - s.Linux.RootfsPropagation = "rslave" - } - default: - log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath) - options = append(options, "rprivate") - } - - // NOTE(random-liu): we don't change all mounts to `ro` when root filesystem - // is readonly. This is different from docker's behavior, but make more sense. - if mount.GetReadonly() { - options = append(options, "ro") - } else { - options = append(options, "rw") - } - - if mount.GetSelinuxRelabel() { - if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP { - return errors.Wrapf(err, "relabel %q with %q failed", src, mountLabel) - } - } - s.Mounts = append(s.Mounts, runtimespec.Mount{ - Source: src, - Destination: dst, - Type: "bind", - Options: options, - }) - } - return nil - } -} - // mounts defines how to sort runtime.Mount. // This is the same with the Docker implementation: // https://github.com/moby/moby/blob/17.05.x/daemon/volumes.go#L26 @@ -307,325 +91,6 @@ func (m orderedMounts) parts(i int) int { return strings.Count(filepath.Clean(m[i].ContainerPath), string(os.PathSeparator)) } -// Ensure mount point on which path is mounted, is shared. -func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error { - mountInfo, err := lookupMount(path) - if err != nil { - return err - } - - // Make sure source mount point is shared. - optsSplit := strings.Split(mountInfo.Optional, " ") - for _, opt := range optsSplit { - if strings.HasPrefix(opt, "shared:") { - return nil - } - } - - return errors.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint) -} - -// ensure mount point on which path is mounted, is either shared or slave. -func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error { - mountInfo, err := lookupMount(path) - if err != nil { - return err - } - // Make sure source mount point is shared. - optsSplit := strings.Split(mountInfo.Optional, " ") - for _, opt := range optsSplit { - if strings.HasPrefix(opt, "shared:") { - return nil - } else if strings.HasPrefix(opt, "master:") { - return nil - } - } - return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint) -} - -// WithPrivilegedDevices allows all host devices inside the container -func WithPrivilegedDevices(_ context.Context, _ oci.Client, _ *containers.Container, s *runtimespec.Spec) error { - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - if s.Linux.Resources == nil { - s.Linux.Resources = &runtimespec.LinuxResources{} - } - hostDevices, err := devices.HostDevices() - if err != nil { - return err - } - for _, hostDevice := range hostDevices { - rd := runtimespec.LinuxDevice{ - Path: hostDevice.Path, - Type: string(hostDevice.Type), - Major: hostDevice.Major, - Minor: hostDevice.Minor, - UID: &hostDevice.Uid, - GID: &hostDevice.Gid, - } - if hostDevice.Major == 0 && hostDevice.Minor == 0 { - // Invalid device, most likely a symbolic link, skip it. - continue - } - addDevice(s, rd) - } - s.Linux.Resources.Devices = []runtimespec.LinuxDeviceCgroup{ - { - Allow: true, - Access: "rwm", - }, - } - return nil -} - -func addDevice(s *runtimespec.Spec, rd runtimespec.LinuxDevice) { - for i, dev := range s.Linux.Devices { - if dev.Path == rd.Path { - s.Linux.Devices[i] = rd - return - } - } - s.Linux.Devices = append(s.Linux.Devices, rd) -} - -// WithDevices sets the provided devices onto the container spec -func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - if s.Linux.Resources == nil { - s.Linux.Resources = &runtimespec.LinuxResources{} - } - for _, device := range config.GetDevices() { - path, err := osi.ResolveSymbolicLink(device.HostPath) - if err != nil { - return err - } - dev, err := devices.DeviceFromPath(path, device.Permissions) - if err != nil { - return err - } - rd := runtimespec.LinuxDevice{ - Path: device.ContainerPath, - Type: string(dev.Type), - Major: dev.Major, - Minor: dev.Minor, - UID: &dev.Uid, - GID: &dev.Gid, - } - - addDevice(s, rd) - - s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, runtimespec.LinuxDeviceCgroup{ - Allow: true, - Type: string(dev.Type), - Major: &dev.Major, - Minor: &dev.Minor, - Access: dev.Permissions, - }) - } - return nil - } -} - -// WithCapabilities sets the provided capabilties from the security context -func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts { - capabilities := sc.GetCapabilities() - if capabilities == nil { - return nullOpt - } - - var opts []oci.SpecOpts - // Add/drop all capabilities if "all" is specified, so that - // following individual add/drop could still work. E.g. - // AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"} - // will be all capabilities without `CAP_CHOWN`. - if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") { - opts = append(opts, oci.WithAllCapabilities) - } - if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") { - opts = append(opts, oci.WithCapabilities(nil)) - } - - var caps []string - for _, c := range capabilities.GetAddCapabilities() { - if strings.ToUpper(c) == "ALL" { - continue - } - // Capabilities in CRI doesn't have `CAP_` prefix, so add it. - caps = append(caps, "CAP_"+strings.ToUpper(c)) - } - opts = append(opts, oci.WithAddedCapabilities(caps)) - - caps = []string{} - for _, c := range capabilities.GetDropCapabilities() { - if strings.ToUpper(c) == "ALL" { - continue - } - caps = append(caps, "CAP_"+strings.ToUpper(c)) - } - opts = append(opts, oci.WithDroppedCapabilities(caps)) - return oci.Compose(opts...) -} - -// WithoutAmbientCaps removes the ambient caps from the spec -func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - if s.Process.Capabilities == nil { - s.Process.Capabilities = &runtimespec.LinuxCapabilities{} - } - s.Process.Capabilities.Ambient = nil - return nil -} - -// WithDisabledCgroups clears the Cgroups Path from the spec -func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - s.Linux.CgroupsPath = "" - return nil -} - -// WithSelinuxLabels sets the mount and process labels -func WithSelinuxLabels(process, mount string) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - s.Linux.MountLabel = mount - s.Process.SelinuxLabel = process - return nil - } -} - -// WithResources sets the provided resource restrictions -func WithResources(resources *runtime.LinuxContainerResources) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { - if resources == nil { - return nil - } - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - if s.Linux.Resources == nil { - s.Linux.Resources = &runtimespec.LinuxResources{} - } - if s.Linux.Resources.CPU == nil { - s.Linux.Resources.CPU = &runtimespec.LinuxCPU{} - } - if s.Linux.Resources.Memory == nil { - s.Linux.Resources.Memory = &runtimespec.LinuxMemory{} - } - var ( - p = uint64(resources.GetCpuPeriod()) - q = resources.GetCpuQuota() - shares = uint64(resources.GetCpuShares()) - limit = resources.GetMemoryLimitInBytes() - ) - - if p != 0 { - s.Linux.Resources.CPU.Period = &p - } - if q != 0 { - s.Linux.Resources.CPU.Quota = &q - } - if shares != 0 { - s.Linux.Resources.CPU.Shares = &shares - } - if cpus := resources.GetCpusetCpus(); cpus != "" { - s.Linux.Resources.CPU.Cpus = cpus - } - if mems := resources.GetCpusetMems(); mems != "" { - s.Linux.Resources.CPU.Mems = resources.GetCpusetMems() - } - if limit != 0 { - s.Linux.Resources.Memory.Limit = &limit - } - return nil - } -} - -// WithOOMScoreAdj sets the oom score -func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - - resources := config.GetLinux().GetResources() - if resources == nil { - return nil - } - adj := int(resources.GetOomScoreAdj()) - if restrict { - var err error - adj, err = restrictOOMScoreAdj(adj) - if err != nil { - return err - } - } - s.Process.OOMScoreAdj = &adj - return nil - } -} - -// WithSysctls sets the provided sysctls onto the spec -func WithSysctls(sysctls map[string]string) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - if s.Linux.Sysctl == nil { - s.Linux.Sysctl = make(map[string]string) - } - for k, v := range sysctls { - s.Linux.Sysctl[k] = v - } - return nil - } -} - -// WithPodOOMScoreAdj sets the oom score for the pod sandbox -func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - if restrict { - var err error - adj, err = restrictOOMScoreAdj(adj) - if err != nil { - return err - } - } - s.Process.OOMScoreAdj = &adj - return nil - } -} - -// WithSupplementalGroups sets the supplemental groups for the process -func WithSupplementalGroups(groups []int64) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Process == nil { - s.Process = &runtimespec.Process{} - } - var guids []uint32 - for _, g := range groups { - guids = append(guids, uint32(g)) - } - s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids) - return nil - } -} - // WithAnnotation sets the provided annotation func WithAnnotation(k, v string) oci.SpecOpts { return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { @@ -636,110 +101,3 @@ func WithAnnotation(k, v string) oci.SpecOpts { return nil } } - -// WithPodNamespaces sets the pod namespaces for the container -func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, pid uint32) oci.SpecOpts { - namespaces := config.GetNamespaceOptions() - - opts := []oci.SpecOpts{ - oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(pid)}), - oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(pid)}), - oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(pid)}), - } - if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER { - opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(pid)})) - } - return oci.Compose(opts...) -} - -// WithDefaultSandboxShares sets the default sandbox CPU shares -func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Linux == nil { - s.Linux = &runtimespec.Linux{} - } - if s.Linux.Resources == nil { - s.Linux.Resources = &runtimespec.LinuxResources{} - } - if s.Linux.Resources.CPU == nil { - s.Linux.Resources.CPU = &runtimespec.LinuxCPU{} - } - i := uint64(DefaultSandboxCPUshares) - s.Linux.Resources.CPU.Shares = &i - return nil -} - -// WithoutNamespace removes the provided namespace -func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts { - return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { - if s.Linux == nil { - return nil - } - var namespaces []runtimespec.LinuxNamespace - for i, ns := range s.Linux.Namespaces { - if ns.Type != t { - namespaces = append(namespaces, s.Linux.Namespaces[i]) - } - } - s.Linux.Namespaces = namespaces - return nil - } -} - -func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error { - return nil -} - -func getCurrentOOMScoreAdj() (int, error) { - b, err := ioutil.ReadFile("/proc/self/oom_score_adj") - if err != nil { - return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") - } - s := strings.TrimSpace(string(b)) - i, err := strconv.Atoi(s) - if err != nil { - return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") - } - return i, nil -} - -func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) { - currentOOMScoreAdj, err := getCurrentOOMScoreAdj() - if err != nil { - return preferredOOMScoreAdj, err - } - if preferredOOMScoreAdj < currentOOMScoreAdj { - return currentOOMScoreAdj, nil - } - return preferredOOMScoreAdj, nil -} - -const ( - // netNSFormat is the format of network namespace of a process. - netNSFormat = "/proc/%v/ns/net" - // ipcNSFormat is the format of ipc namespace of a process. - ipcNSFormat = "/proc/%v/ns/ipc" - // utsNSFormat is the format of uts namespace of a process. - utsNSFormat = "/proc/%v/ns/uts" - // pidNSFormat is the format of pid namespace of a process. - pidNSFormat = "/proc/%v/ns/pid" -) - -// GetNetworkNamespace returns the network namespace of a process. -func GetNetworkNamespace(pid uint32) string { - return fmt.Sprintf(netNSFormat, pid) -} - -// GetIPCNamespace returns the ipc namespace of a process. -func GetIPCNamespace(pid uint32) string { - return fmt.Sprintf(ipcNSFormat, pid) -} - -// GetUTSNamespace returns the uts namespace of a process. -func GetUTSNamespace(pid uint32) string { - return fmt.Sprintf(utsNSFormat, pid) -} - -// GetPIDNamespace returns the pid namespace of a process. -func GetPIDNamespace(pid uint32) string { - return fmt.Sprintf(pidNSFormat, pid) -} diff --git a/pkg/containerd/opts/spec_test.go b/pkg/containerd/opts/spec_test.go index 1ceb71698..be1bc979c 100644 --- a/pkg/containerd/opts/spec_test.go +++ b/pkg/containerd/opts/spec_test.go @@ -1,5 +1,5 @@ /* -Copyright 2018 The containerd Authors. +Copyright The containerd Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,16 +21,9 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) -func TestMergeGids(t *testing.T) { - gids1 := []uint32{3, 2, 1} - gids2 := []uint32{2, 3, 4} - assert.Equal(t, []uint32{1, 2, 3, 4}, mergeGids(gids1, gids2)) -} - func TestOrderedMounts(t *testing.T) { mounts := []*runtime.Mount{ {ContainerPath: "/a/b/c"}, @@ -51,20 +44,3 @@ func TestOrderedMounts(t *testing.T) { sort.Stable(orderedMounts(mounts)) assert.Equal(t, expected, mounts) } - -func TestRestrictOOMScoreAdj(t *testing.T) { - current, err := getCurrentOOMScoreAdj() - require.NoError(t, err) - - got, err := restrictOOMScoreAdj(current - 1) - require.NoError(t, err) - assert.Equal(t, got, current) - - got, err = restrictOOMScoreAdj(current) - require.NoError(t, err) - assert.Equal(t, got, current) - - got, err = restrictOOMScoreAdj(current + 1) - require.NoError(t, err) - assert.Equal(t, got, current+1) -} diff --git a/pkg/containerd/opts/spec_unix.go b/pkg/containerd/opts/spec_unix.go new file mode 100644 index 000000000..94a072afd --- /dev/null +++ b/pkg/containerd/opts/spec_unix.go @@ -0,0 +1,676 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package opts + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/oci" + "github.com/opencontainers/runc/libcontainer/devices" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "golang.org/x/sys/unix" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + osinterface "github.com/containerd/cri/pkg/os" + "github.com/containerd/cri/pkg/util" +) + +const ( + // DefaultSandboxCPUshares is default cpu shares for sandbox container. + // TODO(windows): Evaluate whether this can be used for windows sandbox + // container cpu shares. + DefaultSandboxCPUshares = 2 +) + +// WithAdditionalGIDs adds any additional groups listed for a particular user in the +// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array. +func WithAdditionalGIDs(userstr string) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + gids := s.Process.User.AdditionalGids + if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil { + return err + } + // Merge existing gids and new gids. + s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids) + return nil + } +} + +func mergeGids(gids1, gids2 []uint32) []uint32 { + gidsMap := make(map[uint32]struct{}) + for _, gid1 := range gids1 { + gidsMap[gid1] = struct{}{} + } + for _, gid2 := range gids2 { + gidsMap[gid2] = struct{}{} + } + var gids []uint32 + for gid := range gidsMap { + gids = append(gids, gid) + } + sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] }) + return gids +} + +// WithoutRunMount removes the `/run` inside the spec +func WithoutRunMount(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { + var ( + mounts []runtimespec.Mount + current = s.Mounts + ) + for _, m := range current { + if filepath.Clean(m.Destination) == "/run" { + continue + } + mounts = append(mounts, m) + } + s.Mounts = mounts + return nil +} + +// WithoutDefaultSecuritySettings removes the default security settings generated on a spec +func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + // Make sure no default seccomp/apparmor is specified + s.Process.ApparmorProfile = "" + if s.Linux != nil { + s.Linux.Seccomp = nil + } + // Remove default rlimits (See issue #515) + s.Process.Rlimits = nil + return nil +} + +// WithMounts sorts and adds runtime and CRI mounts to the spec +func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) { + // mergeMounts merge CRI mounts with extra mounts. If a mount destination + // is mounted by both a CRI mount and an extra mount, the CRI mount will + // be kept. + var ( + criMounts = config.GetMounts() + mounts = append([]*runtime.Mount{}, criMounts...) + ) + // Copy all mounts from extra mounts, except for mounts overriden by CRI. + for _, e := range extra { + found := false + for _, c := range criMounts { + if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) { + found = true + break + } + } + if !found { + mounts = append(mounts, e) + } + } + // --- + + // Sort mounts in number of parts. This ensures that high level mounts don't + // shadow other mounts. + sort.Sort(orderedMounts(mounts)) + + // Mount cgroup into the container as readonly, which inherits docker's behavior. + s.Mounts = append(s.Mounts, runtimespec.Mount{ + Source: "cgroup", + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, + }) + + // Copy all mounts from default mounts, except for + // - mounts overriden by supplied mount; + // - all mounts under /dev if a supplied /dev is present. + mountSet := make(map[string]struct{}) + for _, m := range mounts { + mountSet[filepath.Clean(m.ContainerPath)] = struct{}{} + } + + defaultMounts := s.Mounts + s.Mounts = nil + + for _, m := range defaultMounts { + dst := filepath.Clean(m.Destination) + if _, ok := mountSet[dst]; ok { + // filter out mount overridden by a supplied mount + continue + } + if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") { + // filter out everything under /dev if /dev is a supplied mount + continue + } + s.Mounts = append(s.Mounts, m) + } + + for _, mount := range mounts { + var ( + dst = mount.GetContainerPath() + src = mount.GetHostPath() + ) + // Create the host path if it doesn't exist. + // TODO(random-liu): Add CRI validation test for this case. + if _, err := osi.Stat(src); err != nil { + if !os.IsNotExist(err) { + return errors.Wrapf(err, "failed to stat %q", src) + } + if err := osi.MkdirAll(src, 0755); err != nil { + return errors.Wrapf(err, "failed to mkdir %q", src) + } + } + // TODO(random-liu): Add cri-containerd integration test or cri validation test + // for this. + src, err := osi.ResolveSymbolicLink(src) + if err != nil { + return errors.Wrapf(err, "failed to resolve symlink %q", src) + } + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + options := []string{"rbind"} + switch mount.GetPropagation() { + case runtime.MountPropagation_PROPAGATION_PRIVATE: + options = append(options, "rprivate") + // Since default root propogation in runc is rprivate ignore + // setting the root propagation + case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL: + if err := ensureShared(src, osi.(osinterface.UNIX).LookupMount); err != nil { + return err + } + options = append(options, "rshared") + s.Linux.RootfsPropagation = "rshared" + case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER: + if err := ensureSharedOrSlave(src, osi.(osinterface.UNIX).LookupMount); err != nil { + return err + } + options = append(options, "rslave") + if s.Linux.RootfsPropagation != "rshared" && + s.Linux.RootfsPropagation != "rslave" { + s.Linux.RootfsPropagation = "rslave" + } + default: + log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath) + options = append(options, "rprivate") + } + + // NOTE(random-liu): we don't change all mounts to `ro` when root filesystem + // is readonly. This is different from docker's behavior, but make more sense. + if mount.GetReadonly() { + options = append(options, "ro") + } else { + options = append(options, "rw") + } + + if mount.GetSelinuxRelabel() { + if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP { + return errors.Wrapf(err, "relabel %q with %q failed", src, mountLabel) + } + } + s.Mounts = append(s.Mounts, runtimespec.Mount{ + Source: src, + Destination: dst, + Type: "bind", + Options: options, + }) + } + return nil + } +} + +// Ensure mount point on which path is mounted, is shared. +func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error { + mountInfo, err := lookupMount(path) + if err != nil { + return err + } + + // Make sure source mount point is shared. + optsSplit := strings.Split(mountInfo.Optional, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + return nil + } + } + + return errors.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint) +} + +// ensure mount point on which path is mounted, is either shared or slave. +func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error { + mountInfo, err := lookupMount(path) + if err != nil { + return err + } + // Make sure source mount point is shared. + optsSplit := strings.Split(mountInfo.Optional, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + return nil + } else if strings.HasPrefix(opt, "master:") { + return nil + } + } + return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint) +} + +// WithPrivilegedDevices allows all host devices inside the container +func WithPrivilegedDevices(_ context.Context, _ oci.Client, _ *containers.Container, s *runtimespec.Spec) error { + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + if s.Linux.Resources == nil { + s.Linux.Resources = &runtimespec.LinuxResources{} + } + hostDevices, err := devices.HostDevices() + if err != nil { + return err + } + for _, hostDevice := range hostDevices { + rd := runtimespec.LinuxDevice{ + Path: hostDevice.Path, + Type: string(hostDevice.Type), + Major: hostDevice.Major, + Minor: hostDevice.Minor, + UID: &hostDevice.Uid, + GID: &hostDevice.Gid, + } + if hostDevice.Major == 0 && hostDevice.Minor == 0 { + // Invalid device, most likely a symbolic link, skip it. + continue + } + addDevice(s, rd) + } + s.Linux.Resources.Devices = []runtimespec.LinuxDeviceCgroup{ + { + Allow: true, + Access: "rwm", + }, + } + return nil +} + +func addDevice(s *runtimespec.Spec, rd runtimespec.LinuxDevice) { + for i, dev := range s.Linux.Devices { + if dev.Path == rd.Path { + s.Linux.Devices[i] = rd + return + } + } + s.Linux.Devices = append(s.Linux.Devices, rd) +} + +// WithDevices sets the provided devices onto the container spec +func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + if s.Linux.Resources == nil { + s.Linux.Resources = &runtimespec.LinuxResources{} + } + for _, device := range config.GetDevices() { + path, err := osi.ResolveSymbolicLink(device.HostPath) + if err != nil { + return err + } + dev, err := devices.DeviceFromPath(path, device.Permissions) + if err != nil { + return err + } + rd := runtimespec.LinuxDevice{ + Path: device.ContainerPath, + Type: string(dev.Type), + Major: dev.Major, + Minor: dev.Minor, + UID: &dev.Uid, + GID: &dev.Gid, + } + + addDevice(s, rd) + + s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, runtimespec.LinuxDeviceCgroup{ + Allow: true, + Type: string(dev.Type), + Major: &dev.Major, + Minor: &dev.Minor, + Access: dev.Permissions, + }) + } + return nil + } +} + +// WithCapabilities sets the provided capabilties from the security context +func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts { + capabilities := sc.GetCapabilities() + if capabilities == nil { + return nullOpt + } + + var opts []oci.SpecOpts + // Add/drop all capabilities if "all" is specified, so that + // following individual add/drop could still work. E.g. + // AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"} + // will be all capabilities without `CAP_CHOWN`. + if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") { + opts = append(opts, oci.WithAllCapabilities) + } + if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") { + opts = append(opts, oci.WithCapabilities(nil)) + } + + var caps []string + for _, c := range capabilities.GetAddCapabilities() { + if strings.ToUpper(c) == "ALL" { + continue + } + // Capabilities in CRI doesn't have `CAP_` prefix, so add it. + caps = append(caps, "CAP_"+strings.ToUpper(c)) + } + opts = append(opts, oci.WithAddedCapabilities(caps)) + + caps = []string{} + for _, c := range capabilities.GetDropCapabilities() { + if strings.ToUpper(c) == "ALL" { + continue + } + caps = append(caps, "CAP_"+strings.ToUpper(c)) + } + opts = append(opts, oci.WithDroppedCapabilities(caps)) + return oci.Compose(opts...) +} + +// WithoutAmbientCaps removes the ambient caps from the spec +func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + if s.Process.Capabilities == nil { + s.Process.Capabilities = &runtimespec.LinuxCapabilities{} + } + s.Process.Capabilities.Ambient = nil + return nil +} + +// WithDisabledCgroups clears the Cgroups Path from the spec +func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + s.Linux.CgroupsPath = "" + return nil +} + +// WithSelinuxLabels sets the mount and process labels +func WithSelinuxLabels(process, mount string) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + s.Linux.MountLabel = mount + s.Process.SelinuxLabel = process + return nil + } +} + +// WithResources sets the provided resource restrictions +func WithResources(resources *runtime.LinuxContainerResources) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { + if resources == nil { + return nil + } + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + if s.Linux.Resources == nil { + s.Linux.Resources = &runtimespec.LinuxResources{} + } + if s.Linux.Resources.CPU == nil { + s.Linux.Resources.CPU = &runtimespec.LinuxCPU{} + } + if s.Linux.Resources.Memory == nil { + s.Linux.Resources.Memory = &runtimespec.LinuxMemory{} + } + var ( + p = uint64(resources.GetCpuPeriod()) + q = resources.GetCpuQuota() + shares = uint64(resources.GetCpuShares()) + limit = resources.GetMemoryLimitInBytes() + ) + + if p != 0 { + s.Linux.Resources.CPU.Period = &p + } + if q != 0 { + s.Linux.Resources.CPU.Quota = &q + } + if shares != 0 { + s.Linux.Resources.CPU.Shares = &shares + } + if cpus := resources.GetCpusetCpus(); cpus != "" { + s.Linux.Resources.CPU.Cpus = cpus + } + if mems := resources.GetCpusetMems(); mems != "" { + s.Linux.Resources.CPU.Mems = resources.GetCpusetMems() + } + if limit != 0 { + s.Linux.Resources.Memory.Limit = &limit + } + return nil + } +} + +// WithOOMScoreAdj sets the oom score +func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + + resources := config.GetLinux().GetResources() + if resources == nil { + return nil + } + adj := int(resources.GetOomScoreAdj()) + if restrict { + var err error + adj, err = restrictOOMScoreAdj(adj) + if err != nil { + return err + } + } + s.Process.OOMScoreAdj = &adj + return nil + } +} + +// WithSysctls sets the provided sysctls onto the spec +func WithSysctls(sysctls map[string]string) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + if s.Linux.Sysctl == nil { + s.Linux.Sysctl = make(map[string]string) + } + for k, v := range sysctls { + s.Linux.Sysctl[k] = v + } + return nil + } +} + +// WithPodOOMScoreAdj sets the oom score for the pod sandbox +func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + if restrict { + var err error + adj, err = restrictOOMScoreAdj(adj) + if err != nil { + return err + } + } + s.Process.OOMScoreAdj = &adj + return nil + } +} + +// WithSupplementalGroups sets the supplemental groups for the process +func WithSupplementalGroups(groups []int64) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Process == nil { + s.Process = &runtimespec.Process{} + } + var guids []uint32 + for _, g := range groups { + guids = append(guids, uint32(g)) + } + s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids) + return nil + } +} + +// WithPodNamespaces sets the pod namespaces for the container +func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, pid uint32) oci.SpecOpts { + namespaces := config.GetNamespaceOptions() + + opts := []oci.SpecOpts{ + oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(pid)}), + oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(pid)}), + oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(pid)}), + } + if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER { + opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(pid)})) + } + return oci.Compose(opts...) +} + +// WithDefaultSandboxShares sets the default sandbox CPU shares +func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Linux == nil { + s.Linux = &runtimespec.Linux{} + } + if s.Linux.Resources == nil { + s.Linux.Resources = &runtimespec.LinuxResources{} + } + if s.Linux.Resources.CPU == nil { + s.Linux.Resources.CPU = &runtimespec.LinuxCPU{} + } + i := uint64(DefaultSandboxCPUshares) + s.Linux.Resources.CPU.Shares = &i + return nil +} + +// WithoutNamespace removes the provided namespace +func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts { + return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error { + if s.Linux == nil { + return nil + } + var namespaces []runtimespec.LinuxNamespace + for i, ns := range s.Linux.Namespaces { + if ns.Type != t { + namespaces = append(namespaces, s.Linux.Namespaces[i]) + } + } + s.Linux.Namespaces = namespaces + return nil + } +} + +func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error { + return nil +} + +func getCurrentOOMScoreAdj() (int, error) { + b, err := ioutil.ReadFile("/proc/self/oom_score_adj") + if err != nil { + return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") + } + s := strings.TrimSpace(string(b)) + i, err := strconv.Atoi(s) + if err != nil { + return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") + } + return i, nil +} + +func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) { + currentOOMScoreAdj, err := getCurrentOOMScoreAdj() + if err != nil { + return preferredOOMScoreAdj, err + } + if preferredOOMScoreAdj < currentOOMScoreAdj { + return currentOOMScoreAdj, nil + } + return preferredOOMScoreAdj, nil +} + +const ( + // netNSFormat is the format of network namespace of a process. + netNSFormat = "/proc/%v/ns/net" + // ipcNSFormat is the format of ipc namespace of a process. + ipcNSFormat = "/proc/%v/ns/ipc" + // utsNSFormat is the format of uts namespace of a process. + utsNSFormat = "/proc/%v/ns/uts" + // pidNSFormat is the format of pid namespace of a process. + pidNSFormat = "/proc/%v/ns/pid" +) + +// GetNetworkNamespace returns the network namespace of a process. +func GetNetworkNamespace(pid uint32) string { + return fmt.Sprintf(netNSFormat, pid) +} + +// GetIPCNamespace returns the ipc namespace of a process. +func GetIPCNamespace(pid uint32) string { + return fmt.Sprintf(ipcNSFormat, pid) +} + +// GetUTSNamespace returns the uts namespace of a process. +func GetUTSNamespace(pid uint32) string { + return fmt.Sprintf(utsNSFormat, pid) +} + +// GetPIDNamespace returns the pid namespace of a process. +func GetPIDNamespace(pid uint32) string { + return fmt.Sprintf(pidNSFormat, pid) +} diff --git a/pkg/containerd/opts/spec_unix_test.go b/pkg/containerd/opts/spec_unix_test.go new file mode 100644 index 000000000..a75285daf --- /dev/null +++ b/pkg/containerd/opts/spec_unix_test.go @@ -0,0 +1,49 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package opts + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMergeGids(t *testing.T) { + gids1 := []uint32{3, 2, 1} + gids2 := []uint32{2, 3, 4} + assert.Equal(t, []uint32{1, 2, 3, 4}, mergeGids(gids1, gids2)) +} + +func TestRestrictOOMScoreAdj(t *testing.T) { + current, err := getCurrentOOMScoreAdj() + require.NoError(t, err) + + got, err := restrictOOMScoreAdj(current - 1) + require.NoError(t, err) + assert.Equal(t, got, current) + + got, err = restrictOOMScoreAdj(current) + require.NoError(t, err) + assert.Equal(t, got, current) + + got, err = restrictOOMScoreAdj(current + 1) + require.NoError(t, err) + assert.Equal(t, got, current+1) +} diff --git a/pkg/ioutil/write_closer_test.go b/pkg/ioutil/write_closer_test.go index 8b1eb4b20..c42e77eb6 100644 --- a/pkg/ioutil/write_closer_test.go +++ b/pkg/ioutil/write_closer_test.go @@ -69,7 +69,7 @@ func TestSerialWriteCloser(t *testing.T) { testData[i] = []byte(repeatNumber(i, dataLen) + "\n") } - f, err := ioutil.TempFile("/tmp", "serial-write-closer") + f, err := ioutil.TempFile("", "serial-write-closer") require.NoError(t, err) defer os.RemoveAll(f.Name()) defer f.Close() diff --git a/pkg/netns/netns.go b/pkg/netns/netns_unix.go similarity index 99% rename from pkg/netns/netns.go rename to pkg/netns/netns_unix.go index 56da0b6f2..e1467e558 100644 --- a/pkg/netns/netns.go +++ b/pkg/netns/netns_unix.go @@ -1,5 +1,7 @@ +// +build !windows + /* -Copyright 2018 The Containerd Authors. +Copyright The containerd Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/pkg/netns/netns_windows.go b/pkg/netns/netns_windows.go new file mode 100644 index 000000000..6cbc512c0 --- /dev/null +++ b/pkg/netns/netns_windows.go @@ -0,0 +1,50 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package netns + +// TODO(windows): Implement netns for windows. +// NetNS holds network namespace. +type NetNS struct { +} + +// NewNetNS creates a network namespace. +func NewNetNS() (*NetNS, error) { + return nil, nil +} + +// LoadNetNS loads existing network namespace. +func LoadNetNS(path string) *NetNS { + return nil +} + +// Remove removes network namepace. Remove is idempotent, meaning it might +// be invoked multiple times and provides consistent result. +func (n *NetNS) Remove() error { + return nil +} + +// Closed checks whether the network namespace has been closed. +func (n *NetNS) Closed() (bool, error) { + return false, nil +} + +// GetPath returns network namespace path for sandbox container +func (n *NetNS) GetPath() string { + return "" +} diff --git a/pkg/os/os.go b/pkg/os/os.go index f77515a11..c82cec6ef 100644 --- a/pkg/os/os.go +++ b/pkg/os/os.go @@ -22,11 +22,7 @@ import ( "os" "path/filepath" - "github.com/containerd/containerd/mount" - "github.com/containerd/fifo" "github.com/docker/docker/pkg/symlink" - "golang.org/x/net/context" - "golang.org/x/sys/unix" ) // OS collects system level operations that need to be mocked out @@ -34,15 +30,11 @@ import ( type OS interface { MkdirAll(path string, perm os.FileMode) error RemoveAll(path string) error - OpenFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) Stat(name string) (os.FileInfo, error) ResolveSymbolicLink(name string) (string, error) FollowSymlinkInScope(path, scope string) (string, error) CopyFile(src, dest string, perm os.FileMode) error WriteFile(filename string, data []byte, perm os.FileMode) error - Mount(source string, target string, fstype string, flags uintptr, data string) error - Unmount(target string) error - LookupMount(path string) (mount.Info, error) Hostname() (string, error) } @@ -59,11 +51,6 @@ func (RealOS) RemoveAll(path string) error { return os.RemoveAll(path) } -// OpenFifo will call fifo.OpenFifo to open a fifo. -func (RealOS) OpenFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) { - return fifo.OpenFifo(ctx, fn, flag, perm) -} - // Stat will call os.Stat to get the status of the given file. func (RealOS) Stat(name string) (os.FileInfo, error) { return os.Stat(name) @@ -109,33 +96,6 @@ func (RealOS) WriteFile(filename string, data []byte, perm os.FileMode) error { return ioutil.WriteFile(filename, data, perm) } -// Mount will call unix.Mount to mount the file. -func (RealOS) Mount(source string, target string, fstype string, flags uintptr, data string) error { - return unix.Mount(source, target, fstype, flags, data) -} - -// Unmount will call Unmount to unmount the file. -func (RealOS) Unmount(target string) error { - return Unmount(target) -} - -// LookupMount gets mount info of a given path. -func (RealOS) LookupMount(path string) (mount.Info, error) { - return mount.Lookup(path) -} - -// Unmount unmounts the target. It does not return an error in case the target is not mounted. -// In case the target does not exist, the appropriate error is returned. -func Unmount(target string) error { - err := unix.Unmount(target, unix.MNT_DETACH) - if err == unix.EINVAL { - // ignore "not mounted" error - err = nil - } - - return err -} - // Hostname will call os.Hostname to get the hostname of the host. func (RealOS) Hostname() (string, error) { return os.Hostname() diff --git a/pkg/os/os_unix.go b/pkg/os/os_unix.go new file mode 100644 index 000000000..bf31a374d --- /dev/null +++ b/pkg/os/os_unix.go @@ -0,0 +1,59 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package os + +import ( + "github.com/containerd/containerd/mount" + "golang.org/x/sys/unix" +) + +// UNIX collects unix system level operations that need to be +// mocked out during tests. +type UNIX interface { + Mount(source string, target string, fstype string, flags uintptr, data string) error + Unmount(target string) error + LookupMount(path string) (mount.Info, error) +} + +// Mount will call unix.Mount to mount the file. +func (RealOS) Mount(source string, target string, fstype string, flags uintptr, data string) error { + return unix.Mount(source, target, fstype, flags, data) +} + +// Unmount will call Unmount to unmount the file. +func (RealOS) Unmount(target string) error { + return Unmount(target) +} + +// LookupMount gets mount info of a given path. +func (RealOS) LookupMount(path string) (mount.Info, error) { + return mount.Lookup(path) +} + +// Unmount unmounts the target. It does not return an error in case the target is not mounted. +// In case the target does not exist, the appropriate error is returned. +func Unmount(target string) error { + err := unix.Unmount(target, unix.MNT_DETACH) + if err == unix.EINVAL { + // ignore "not mounted" error + err = nil + } + + return err +} diff --git a/pkg/os/testing/fake_os.go b/pkg/os/testing/fake_os.go index 7740e847b..05ce516d9 100644 --- a/pkg/os/testing/fake_os.go +++ b/pkg/os/testing/fake_os.go @@ -17,12 +17,10 @@ limitations under the License. package testing import ( - "io" "os" "sync" containerdmount "github.com/containerd/containerd/mount" - "golang.org/x/net/context" osInterface "github.com/containerd/cri/pkg/os" ) @@ -42,7 +40,6 @@ type FakeOS struct { sync.Mutex MkdirAllFn func(string, os.FileMode) error RemoveAllFn func(string) error - OpenFifoFn func(context.Context, string, int, os.FileMode) (io.ReadWriteCloser, error) StatFn func(string) (os.FileInfo, error) ResolveSymbolicLinkFn func(string) (string, error) FollowSymlinkInScopeFn func(string, string) (string, error) @@ -139,19 +136,6 @@ func (f *FakeOS) RemoveAll(path string) error { return nil } -// OpenFifo is a fake call that invokes OpenFifoFn or just returns nil. -func (f *FakeOS) OpenFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) { - f.appendCalls("OpenFifo", ctx, fn, flag, perm) - if err := f.getError("OpenFifo"); err != nil { - return nil, err - } - - if f.OpenFifoFn != nil { - return f.OpenFifoFn(ctx, fn, flag, perm) - } - return nil, nil -} - // Stat is a fake call that invokes StatFn or just return nil. func (f *FakeOS) Stat(name string) (os.FileInfo, error) { f.appendCalls("Stat", name) diff --git a/pkg/os/testing/fake_os_unix.go b/pkg/os/testing/fake_os_unix.go new file mode 100644 index 000000000..b5bff4389 --- /dev/null +++ b/pkg/os/testing/fake_os_unix.go @@ -0,0 +1,23 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testing + +import osInterface "github.com/containerd/cri/pkg/os" + +var _ osInterface.UNIX = &FakeOS{} diff --git a/pkg/server/container_create.go b/pkg/server/container_create.go index d34e66754..41785c373 100644 --- a/pkg/server/container_create.go +++ b/pkg/server/container_create.go @@ -18,23 +18,12 @@ package server import ( "path/filepath" - "strconv" - "strings" "time" "github.com/containerd/containerd" "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/contrib/apparmor" - "github.com/containerd/containerd/contrib/seccomp" "github.com/containerd/containerd/log" "github.com/containerd/containerd/oci" - "github.com/containerd/cri/pkg/annotations" - "github.com/containerd/cri/pkg/config" - customopts "github.com/containerd/cri/pkg/containerd/opts" - ctrdutil "github.com/containerd/cri/pkg/containerd/util" - cio "github.com/containerd/cri/pkg/server/io" - containerstore "github.com/containerd/cri/pkg/store/container" - "github.com/containerd/cri/pkg/util" "github.com/containerd/typeurl" "github.com/davecgh/go-spew/spew" imagespec "github.com/opencontainers/image-spec/specs-go/v1" @@ -42,21 +31,12 @@ import ( "github.com/pkg/errors" "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" -) -const ( - // profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName. - profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747 - // runtimeDefault indicates that we should use or create a runtime default profile. - runtimeDefault = "runtime/default" - // dockerDefault indicates that we should use or create a docker default profile. - dockerDefault = "docker/default" - // appArmorDefaultProfileName is name to use when creating a default apparmor profile. - appArmorDefaultProfileName = "cri-containerd.apparmor.d" - // unconfinedProfile is a string indicating one should run a pod/containerd without a security profile - unconfinedProfile = "unconfined" - // seccompDefaultProfile is the default seccomp profile. - seccompDefaultProfile = dockerDefault + customopts "github.com/containerd/cri/pkg/containerd/opts" + ctrdutil "github.com/containerd/cri/pkg/containerd/util" + cio "github.com/containerd/cri/pkg/server/io" + containerstore "github.com/containerd/cri/pkg/store/container" + "github.com/containerd/cri/pkg/util" ) func init() { @@ -156,10 +136,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta }() // Create container volumes mounts. - volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config) + volumeMounts := c.volumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config) - // Generate container runtime spec. - mounts := c.generateContainerMounts(sandboxID, config) + // Generate container mounts. + mounts := c.containerMounts(sandboxID, config) ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler) if err != nil { @@ -167,7 +147,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id) - spec, err := c.generateContainerSpec(id, sandboxID, sandboxPid, config, sandboxConfig, + spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, config, sandboxConfig, &image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime) if err != nil { return nil, errors.Wrapf(err, "failed to generate container %q spec", id) @@ -185,7 +165,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta // rootfs readonly (requested by spec.Root.Readonly). customopts.WithNewSnapshot(id, containerdImage), } - if len(volumeMounts) > 0 { mountMap := make(map[string]string) for _, v := range volumeMounts { @@ -219,58 +198,11 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } }() - var specOpts []oci.SpecOpts - securityContext := config.GetLinux().GetSecurityContext() - // Set container username. This could only be done by containerd, because it needs - // access to the container rootfs. Pass user name to containerd, and let it overwrite - // the spec for us. - userstr, err := generateUserString( - securityContext.GetRunAsUsername(), - securityContext.GetRunAsUser(), - securityContext.GetRunAsGroup()) - + specOpts, err := c.containerSpecOpts(config, &image.ImageSpec.Config) if err != nil { - return nil, errors.Wrap(err, "failed to generate user string") - } - if userstr == "" { - // Lastly, since no user override was passed via CRI try to set via OCI - // Image - userstr = image.ImageSpec.Config.User - } - if userstr != "" { - specOpts = append(specOpts, oci.WithUser(userstr)) + return nil, errors.Wrap(err, "") } - if securityContext.GetRunAsUsername() != "" { - userstr = securityContext.GetRunAsUsername() - } else { - // Even if RunAsUser is not set, we still call `GetValue` to get uid 0. - // Because it is still useful to get additional gids for uid 0. - userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10) - } - specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr)) - - apparmorSpecOpts, err := generateApparmorSpecOpts( - securityContext.GetApparmorProfile(), - securityContext.GetPrivileged(), - c.apparmorEnabled) - if err != nil { - return nil, errors.Wrap(err, "failed to generate apparmor spec opts") - } - if apparmorSpecOpts != nil { - specOpts = append(specOpts, apparmorSpecOpts) - } - - seccompSpecOpts, err := generateSeccompSpecOpts( - securityContext.GetSeccompProfilePath(), - securityContext.GetPrivileged(), - c.seccompEnabled) - if err != nil { - return nil, errors.Wrap(err, "failed to generate seccomp spec opts") - } - if seccompSpecOpts != nil { - specOpts = append(specOpts, seccompSpecOpts) - } containerLabels := buildLabels(config.Labels, containerKindContainer) runtimeOptions, err := getRuntimeOptions(sandboxInfo) @@ -322,128 +254,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta return &runtime.CreateContainerResponse{ContainerId: id}, nil } -func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxPid uint32, config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, extraMounts []*runtime.Mount, - ociRuntime config.Runtime) (*runtimespec.Spec, error) { - - specOpts := []oci.SpecOpts{ - customopts.WithoutRunMount, - customopts.WithoutDefaultSecuritySettings, - customopts.WithRelativeRoot(relativeRootfsPath), - customopts.WithProcessArgs(config, imageConfig), - // this will be set based on the security context below - oci.WithNewPrivileges, - } - if config.GetWorkingDir() != "" { - specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) - } else if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if config.GetTty() { - specOpts = append(specOpts, oci.WithTTY) - } - - // Add HOSTNAME env. - var ( - err error - hostname = sandboxConfig.GetHostname() - ) - if hostname == "" { - if hostname, err = c.os.Hostname(); err != nil { - return nil, err - } - } - specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname})) - - // Apply envs from image config first, so that envs from container config - // can override them. - env := imageConfig.Env - for _, e := range config.GetEnvs() { - env = append(env, e.GetKey()+"="+e.GetValue()) - } - specOpts = append(specOpts, oci.WithEnv(env)) - - securityContext := config.GetLinux().GetSecurityContext() - selinuxOpt := securityContext.GetSelinuxOptions() - processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt) - if err != nil { - return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions()) - } - specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel)) - - if !c.config.DisableProcMount { - // Apply masked paths if specified. - // If the container is privileged, this will be cleared later on. - specOpts = append(specOpts, oci.WithMaskedPaths(securityContext.GetMaskedPaths())) - - // Apply readonly paths if specified. - // If the container is privileged, this will be cleared later on. - specOpts = append(specOpts, oci.WithReadonlyPaths(securityContext.GetReadonlyPaths())) - } - - if securityContext.GetPrivileged() { - if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() { - return nil, errors.New("no privileged container allowed in sandbox") - } - specOpts = append(specOpts, oci.WithPrivileged) - if !ociRuntime.PrivilegedWithoutHostDevices { - specOpts = append(specOpts, customopts.WithPrivilegedDevices) - } - } else { // not privileged - specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext)) - } - - // Clear all ambient capabilities. The implication of non-root + caps - // is not clearly defined in Kubernetes. - // See https://github.com/kubernetes/kubernetes/issues/56374 - // Keep docker's behavior for now. - specOpts = append(specOpts, - customopts.WithoutAmbientCaps, - customopts.WithSelinuxLabels(processLabel, mountLabel), - ) - - // TODO: Figure out whether we should set no new privilege for sandbox container by default - if securityContext.GetNoNewPrivs() { - specOpts = append(specOpts, oci.WithNoNewPrivileges) - } - // TODO(random-liu): [P1] Set selinux options (privileged or not). - if securityContext.GetReadonlyRootfs() { - specOpts = append(specOpts, oci.WithRootFSReadonly()) - } - - if c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDisabledCgroups) - } else { - specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources())) - if sandboxConfig.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id) - specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) - } - } - - supplementalGroups := securityContext.GetSupplementalGroups() - - for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, - ociRuntime.PodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, - customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), - customopts.WithPodNamespaces(securityContext, sandboxPid), - customopts.WithSupplementalGroups(supplementalGroups), - customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), - customopts.WithAnnotation(annotations.SandboxID, sandboxID), - ) - - return runtimeSpec(id, specOpts...) -} - -// generateVolumeMounts sets up image volumes for container. Rely on the removal of container +// volumeMounts sets up image volumes for container. Rely on the removal of container // root directory to do cleanup. Note that image volume will be skipped, if there is criMounts // specified with the same destination. -func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount { +func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount { if len(config.Volumes) == 0 { return nil } @@ -469,61 +283,9 @@ func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []* return mounts } -// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts -// and /etc/resolv.conf. -func (c *criService) generateContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { - var mounts []*runtime.Mount - securityContext := config.GetLinux().GetSecurityContext() - if !isInCRIMounts(etcHostname, config.GetMounts()) { - // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. - // For in-place upgrade, the old sandbox doesn't have the hostname file, - // do not mount this in that case. - // TODO(random-liu): Remove the check and always mount this when - // containerd 1.1 and 1.2 are deprecated. - hostpath := c.getSandboxHostname(sandboxID) - if _, err := c.os.Stat(hostpath); err == nil { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHostname, - HostPath: hostpath, - Readonly: securityContext.GetReadonlyRootfs(), - }) - } - } - - if !isInCRIMounts(etcHosts, config.GetMounts()) { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHosts, - HostPath: c.getSandboxHosts(sandboxID), - Readonly: securityContext.GetReadonlyRootfs(), - }) - } - - // Mount sandbox resolv.config. - // TODO: Need to figure out whether we should always mount it as read-only - if !isInCRIMounts(resolvConfPath, config.GetMounts()) { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: resolvConfPath, - HostPath: c.getResolvPath(sandboxID), - Readonly: securityContext.GetReadonlyRootfs(), - }) - } - - if !isInCRIMounts(devShm, config.GetMounts()) { - sandboxDevShm := c.getSandboxDevShm(sandboxID) - if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { - sandboxDevShm = devShm - } - mounts = append(mounts, &runtime.Mount{ - ContainerPath: devShm, - HostPath: sandboxDevShm, - Readonly: false, - }) - } - return mounts -} - // runtimeSpec returns a default runtime spec used in cri-containerd. -func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { +// TODO(windows): Remove nolint after windows starts using this helper. +func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { // nolint: deadcode, unused // GenerateSpec needs namespace. ctx := ctrdutil.NamespacedContext() spec, err := oci.GenerateSpec(ctx, nil, &containers.Container{ID: id}, opts...) @@ -532,105 +294,3 @@ func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { } return spec, nil } - -// generateSeccompSpecOpts generates containerd SpecOpts for seccomp. -func generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) { - if privileged { - // Do not set seccomp profile when container is privileged - return nil, nil - } - // Set seccomp profile - if seccompProf == runtimeDefault || seccompProf == dockerDefault { - // use correct default profile (Eg. if not configured otherwise, the default is docker/default) - seccompProf = seccompDefaultProfile - } - if !seccompEnabled { - if seccompProf != "" && seccompProf != unconfinedProfile { - return nil, errors.New("seccomp is not supported") - } - return nil, nil - } - switch seccompProf { - case "", unconfinedProfile: - // Do not set seccomp profile. - return nil, nil - case dockerDefault: - // Note: WithDefaultProfile specOpts must be added after capabilities - return seccomp.WithDefaultProfile(), nil - default: - // Require and Trim default profile name prefix - if !strings.HasPrefix(seccompProf, profileNamePrefix) { - return nil, errors.Errorf("invalid seccomp profile %q", seccompProf) - } - return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil - } -} - -// generateApparmorSpecOpts generates containerd SpecOpts for apparmor. -func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) { - if !apparmorEnabled { - // Should fail loudly if user try to specify apparmor profile - // but we don't support it. - if apparmorProf != "" && apparmorProf != unconfinedProfile { - return nil, errors.New("apparmor is not supported") - } - return nil, nil - } - switch apparmorProf { - // Based on kubernetes#51746, default apparmor profile should be applied - // for when apparmor is not specified. - case runtimeDefault, "": - if privileged { - // Do not set apparmor profile when container is privileged - return nil, nil - } - // TODO (mikebrow): delete created apparmor default profile - return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil - case unconfinedProfile: - return nil, nil - default: - // Require and Trim default profile name prefix - if !strings.HasPrefix(apparmorProf, profileNamePrefix) { - return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf) - } - return apparmor.WithProfile(strings.TrimPrefix(apparmorProf, profileNamePrefix)), nil - } -} - -// generateUserString generates valid user string based on OCI Image Spec -// v1.0.0. -// -// CRI defines that the following combinations are valid: -// -// (none) -> "" -// username -> username -// username, uid -> username -// username, uid, gid -> username:gid -// username, gid -> username:gid -// uid -> uid -// uid, gid -> uid:gid -// gid -> error -// -// TODO(random-liu): Add group name support in CRI. -func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) { - var userstr, groupstr string - if uid != nil { - userstr = strconv.FormatInt(uid.GetValue(), 10) - } - if username != "" { - userstr = username - } - if gid != nil { - groupstr = strconv.FormatInt(gid.GetValue(), 10) - } - if userstr == "" { - if groupstr != "" { - return "", errors.Errorf("user group %q is specified without user", groupstr) - } - return "", nil - } - if groupstr != "" { - userstr = userstr + ":" + groupstr - } - return userstr, nil -} diff --git a/pkg/server/container_create_test.go b/pkg/server/container_create_test.go index 6fb65cd22..76fe93344 100644 --- a/pkg/server/container_create_test.go +++ b/pkg/server/container_create_test.go @@ -17,543 +17,15 @@ limitations under the License. package server import ( - "context" - "os" "path/filepath" - "reflect" - "strings" "testing" - "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/contrib/apparmor" - "github.com/containerd/containerd/contrib/seccomp" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/oci" - "github.com/containerd/cri/pkg/annotations" - "github.com/containerd/cri/pkg/config" - "github.com/containerd/cri/pkg/containerd/opts" - ctrdutil "github.com/containerd/cri/pkg/containerd/util" - ostesting "github.com/containerd/cri/pkg/os/testing" - "github.com/containerd/cri/pkg/util" imagespec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/opencontainers/runc/libcontainer/devices" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) -func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string, - contains, notcontains []string) { - found := false - for _, m := range mounts { - if m.Source == src && m.Destination == dest { - assert.Equal(t, m.Type, typ) - for _, c := range contains { - assert.Contains(t, m.Options, c) - } - for _, n := range notcontains { - assert.NotContains(t, m.Options, n) - } - found = true - break - } - } - assert.True(t, found, "mount from %q to %q not found", src, dest) -} - -func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, - *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{ - Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", - }, - Command: []string{"test", "command"}, - Args: []string{"test", "args"}, - WorkingDir: "test-cwd", - Envs: []*runtime.KeyValue{ - {Key: "k1", Value: "v1"}, - {Key: "k2", Value: "v2"}, - {Key: "k3", Value: "v3=v3bis"}, - {Key: "k4", Value: "v4=v4bis=foop"}, - }, - Mounts: []*runtime.Mount{ - // everything default - { - ContainerPath: "container-path-1", - HostPath: "host-path-1", - }, - // readOnly - { - ContainerPath: "container-path-2", - HostPath: "host-path-2", - Readonly: true, - }, - }, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - Linux: &runtime.LinuxContainerConfig{ - Resources: &runtime.LinuxContainerResources{ - CpuPeriod: 100, - CpuQuota: 200, - CpuShares: 300, - MemoryLimitInBytes: 400, - OomScoreAdj: 500, - CpusetCpus: "0-1", - CpusetMems: "2-3", - }, - SecurityContext: &runtime.LinuxContainerSecurityContext{ - SupplementalGroups: []int64{1111, 2222}, - NoNewPrivs: true, - }, - }, - } - sandboxConfig := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-sandbox-name", - Uid: "test-sandbox-uid", - Namespace: "test-sandbox-ns", - Attempt: 2, - }, - Annotations: map[string]string{"c": "d"}, - Linux: &runtime.LinuxPodSandboxConfig{ - CgroupParent: "/test/cgroup/parent", - SecurityContext: &runtime.LinuxSandboxSecurityContext{}, - }, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, - Entrypoint: []string{"/entrypoint"}, - Cmd: []string{"cmd"}, - WorkingDir: "/workspace", - } - specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { - assert.Equal(t, relativeRootfsPath, spec.Root.Path) - assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) - assert.Equal(t, "test-cwd", spec.Process.Cwd) - assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") - assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") - - t.Logf("Check cgroups bind mount") - checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, nil) - - t.Logf("Check bind mount") - checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rbind", "rprivate", "rw"}, nil) - checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"rbind", "rprivate", "ro"}, nil) - - t.Logf("Check resource limits") - assert.EqualValues(t, *spec.Linux.Resources.CPU.Period, 100) - assert.EqualValues(t, *spec.Linux.Resources.CPU.Quota, 200) - assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, 300) - assert.EqualValues(t, spec.Linux.Resources.CPU.Cpus, "0-1") - assert.EqualValues(t, spec.Linux.Resources.CPU.Mems, "2-3") - assert.EqualValues(t, *spec.Linux.Resources.Memory.Limit, 400) - assert.EqualValues(t, *spec.Process.OOMScoreAdj, 500) - - t.Logf("Check supplemental groups") - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) - - t.Logf("Check no_new_privs") - assert.Equal(t, spec.Process.NoNewPrivileges, true) - - t.Logf("Check cgroup path") - assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) - - t.Logf("Check namespaces") - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - Path: opts.GetNetworkNamespace(sandboxPid), - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.IPCNamespace, - Path: opts.GetIPCNamespace(sandboxPid), - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UTSNamespace, - Path: opts.GetUTSNamespace(sandboxPid), - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - Path: opts.GetPIDNamespace(sandboxPid), - }) - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) - } - return config, sandboxConfig, imageConfig, specCheck -} - -func TestGeneralContainerSpec(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - testSandboxID := "sandbox-id" - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) -} - -func TestContainerCapabilities(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - for desc, test := range map[string]struct { - capability *runtime.Capability - includes []string - excludes []string - }{ - "should be able to add/drop capabilities": { - capability: &runtime.Capability{ - AddCapabilities: []string{"SYS_ADMIN"}, - DropCapabilities: []string{"CHOWN"}, - }, - includes: []string{"CAP_SYS_ADMIN"}, - excludes: []string{"CAP_CHOWN"}, - }, - "should be able to add all capabilities": { - capability: &runtime.Capability{ - AddCapabilities: []string{"ALL"}, - }, - includes: oci.GetAllCapabilities(), - }, - "should be able to drop all capabilities": { - capability: &runtime.Capability{ - DropCapabilities: []string{"ALL"}, - }, - excludes: oci.GetAllCapabilities(), - }, - "should be able to drop capabilities with add all": { - capability: &runtime.Capability{ - AddCapabilities: []string{"ALL"}, - DropCapabilities: []string{"CHOWN"}, - }, - includes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_CHOWN"), - excludes: []string{"CAP_CHOWN"}, - }, - "should be able to add capabilities with drop all": { - capability: &runtime.Capability{ - AddCapabilities: []string{"SYS_ADMIN"}, - DropCapabilities: []string{"ALL"}, - }, - includes: []string{"CAP_SYS_ADMIN"}, - excludes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_SYS_ADMIN"), - }, - } { - t.Logf("TestCase %q", desc) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - - containerConfig.Linux.SecurityContext.Capabilities = test.capability - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - for _, include := range test.includes { - assert.Contains(t, spec.Process.Capabilities.Bounding, include) - assert.Contains(t, spec.Process.Capabilities.Effective, include) - assert.Contains(t, spec.Process.Capabilities.Inheritable, include) - assert.Contains(t, spec.Process.Capabilities.Permitted, include) - } - for _, exclude := range test.excludes { - assert.NotContains(t, spec.Process.Capabilities.Bounding, exclude) - assert.NotContains(t, spec.Process.Capabilities.Effective, exclude) - assert.NotContains(t, spec.Process.Capabilities.Inheritable, exclude) - assert.NotContains(t, spec.Process.Capabilities.Permitted, exclude) - } - assert.Empty(t, spec.Process.Capabilities.Ambient) - } -} - -func TestContainerSpecTty(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, tty := range []bool{true, false} { - containerConfig.Tty = tty - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Equal(t, tty, spec.Process.Terminal) - if tty { - assert.Contains(t, spec.Process.Env, "TERM=xterm") - } else { - assert.NotContains(t, spec.Process.Env, "TERM=xterm") - } - } -} - -func TestPodAnnotationPassthroughContainerSpec(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - - for desc, test := range map[string]struct { - podAnnotations []string - configChange func(*runtime.PodSandboxConfig) - specCheck func(*testing.T, *runtimespec.Spec) - }{ - "a passthrough annotation should be passed as an OCI annotation": { - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - }, - }, - "a non-passthrough annotation should not be passed as an OCI annotation": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["d"] = "e" - }, - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - _, ok := spec.Annotations["d"] - assert.False(t, ok) - }, - }, - "passthrough annotations should support wildcard match": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["t.f"] = "j" - c.Annotations["z.g"] = "o" - c.Annotations["z"] = "o" - c.Annotations["y.ca"] = "b" - c.Annotations["y"] = "b" - }, - podAnnotations: []string{"t*", "z.*", "y.c*"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - t.Logf("%+v", spec.Annotations) - assert.Equal(t, spec.Annotations["t.f"], "j") - assert.Equal(t, spec.Annotations["z.g"], "o") - assert.Equal(t, spec.Annotations["y.ca"], "b") - _, ok := spec.Annotations["y"] - assert.False(t, ok) - _, ok = spec.Annotations["z"] - assert.False(t, ok) - }, - }, - } { - t.Run(desc, func(t *testing.T) { - c := newTestCRIService() - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - if test.configChange != nil { - test.configChange(sandboxConfig) - } - - ociRuntime := config.Runtime{ - PodAnnotations: test.podAnnotations, - } - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, - containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, testSandboxID, testPid, spec) - if test.specCheck != nil { - test.specCheck(t, spec) - } - }) - } - -} - -func TestContainerSpecReadonlyRootfs(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, readonly := range []bool{true, false} { - containerConfig.Linux.SecurityContext.ReadonlyRootfs = readonly - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Equal(t, readonly, spec.Root.Readonly) - } -} - -func TestContainerSpecWithExtraMounts(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - mountInConfig := &runtime.Mount{ - // Test cleanpath - ContainerPath: "test-container-path/", - HostPath: "test-host-path", - Readonly: false, - } - containerConfig.Mounts = append(containerConfig.Mounts, mountInConfig) - extraMounts := []*runtime.Mount{ - { - ContainerPath: "test-container-path", - HostPath: "test-host-path-extra", - Readonly: true, - }, - { - ContainerPath: "/sys", - HostPath: "test-sys-extra", - Readonly: false, - }, - { - ContainerPath: "/dev", - HostPath: "test-dev-extra", - Readonly: false, - }, - } - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - var mounts, sysMounts, devMounts []runtimespec.Mount - for _, m := range spec.Mounts { - if strings.HasPrefix(m.Destination, "test-container-path") { - mounts = append(mounts, m) - } else if m.Destination == "/sys" { - sysMounts = append(sysMounts, m) - } else if strings.HasPrefix(m.Destination, "/dev") { - devMounts = append(devMounts, m) - } - } - t.Logf("CRI mount should override extra mount") - require.Len(t, mounts, 1) - assert.Equal(t, "test-host-path", mounts[0].Source) - assert.Contains(t, mounts[0].Options, "rw") - - t.Logf("Extra mount should override default mount") - require.Len(t, sysMounts, 1) - assert.Equal(t, "test-sys-extra", sysMounts[0].Source) - assert.Contains(t, sysMounts[0].Options, "rw") - - t.Logf("Dev mount should override all default dev mounts") - require.Len(t, devMounts, 1) - assert.Equal(t, "test-dev-extra", devMounts[0].Source) - assert.Contains(t, devMounts[0].Options, "rw") -} - -func TestContainerAndSandboxPrivileged(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for desc, test := range map[string]struct { - containerPrivileged bool - sandboxPrivileged bool - expectError bool - }{ - "privileged container in non-privileged sandbox should fail": { - containerPrivileged: true, - sandboxPrivileged: false, - expectError: true, - }, - "privileged container in privileged sandbox should be fine": { - containerPrivileged: true, - sandboxPrivileged: true, - expectError: false, - }, - "non-privileged container in privileged sandbox should be fine": { - containerPrivileged: false, - sandboxPrivileged: true, - expectError: false, - }, - "non-privileged container in non-privileged sandbox should be fine": { - containerPrivileged: false, - sandboxPrivileged: false, - expectError: false, - }, - } { - t.Logf("TestCase %q", desc) - containerConfig.Linux.SecurityContext.Privileged = test.containerPrivileged - sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - Privileged: test.sandboxPrivileged, - } - _, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - if test.expectError { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } -} - -func TestContainerSpecCommand(t *testing.T) { - for desc, test := range map[string]struct { - criEntrypoint []string - criArgs []string - imageEntrypoint []string - imageArgs []string - expected []string - expectErr bool - }{ - "should use cri entrypoint if it's specified": { - criEntrypoint: []string{"a", "b"}, - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"a", "b"}, - }, - "should use cri entrypoint if it's specified even if it's empty": { - criEntrypoint: []string{}, - criArgs: []string{"a", "b"}, - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"a", "b"}, - }, - "should use cri entrypoint and args if they are specified": { - criEntrypoint: []string{"a", "b"}, - criArgs: []string{"c", "d"}, - imageEntrypoint: []string{"e", "f"}, - imageArgs: []string{"g", "h"}, - expected: []string{"a", "b", "c", "d"}, - }, - "should use image entrypoint if cri entrypoint is not specified": { - criArgs: []string{"a", "b"}, - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"c", "d", "a", "b"}, - }, - "should use image args if both cri entrypoint and args are not specified": { - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"c", "d", "e", "f"}, - }, - "should return error if both entrypoint and args are empty": { - expectErr: true, - }, - } { - - config, _, imageConfig, _ := getCreateContainerTestData() - config.Command = test.criEntrypoint - config.Args = test.criArgs - imageConfig.Entrypoint = test.imageEntrypoint - imageConfig.Cmd = test.imageArgs - - var spec runtimespec.Spec - err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec) - if test.expectErr { - assert.Error(t, err) - continue - } - assert.NoError(t, err) - assert.Equal(t, test.expected, spec.Process.Args, desc) - } -} - -func TestGenerateVolumeMounts(t *testing.T) { +func TestVolumeMounts(t *testing.T) { testContainerRootDir := "test-container-root" for desc, test := range map[string]struct { criMounts []*runtime.Mount @@ -606,7 +78,7 @@ func TestGenerateVolumeMounts(t *testing.T) { Volumes: test.imageVolumes, } c := newTestCRIService() - got := c.generateVolumeMounts(testContainerRootDir, test.criMounts, config) + got := c.volumeMounts(testContainerRootDir, test.criMounts, config) assert.Len(t, got, len(test.expectedMountDest)) for _, dest := range test.expectedMountDest { found := false @@ -623,761 +95,3 @@ func TestGenerateVolumeMounts(t *testing.T) { } } } - -func TestGenerateContainerMounts(t *testing.T) { - const testSandboxID = "test-id" - for desc, test := range map[string]struct { - statFn func(string) (os.FileInfo, error) - criMounts []*runtime.Mount - securityContext *runtime.LinuxContainerSecurityContext - expectedMounts []*runtime.Mount - }{ - "should setup ro mount when rootfs is read-only": { - securityContext: &runtime.LinuxContainerSecurityContext{ - ReadonlyRootfs: true, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - }, - }, - }, - "should setup rw mount when rootfs is read-write": { - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - }, - }, - }, - "should use host /dev/shm when host ipc is set": { - securityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE}, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - }, - { - ContainerPath: "/dev/shm", - HostPath: "/dev/shm", - Readonly: false, - }, - }, - }, - "should skip container mounts if already mounted by CRI": { - criMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: "/test-etc-hostname", - }, - { - ContainerPath: "/etc/hosts", - HostPath: "/test-etc-host", - }, - { - ContainerPath: resolvConfPath, - HostPath: "test-resolv-conf", - }, - { - ContainerPath: "/dev/shm", - HostPath: "test-dev-shm", - }, - }, - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: nil, - }, - "should skip hostname mount if the old sandbox doesn't have hostname file": { - statFn: func(path string) (os.FileInfo, error) { - assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path) - return nil, errors.New("random error") - }, - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - }, - }, - }, - } { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Mounts: test.criMounts, - Linux: &runtime.LinuxContainerConfig{ - SecurityContext: test.securityContext, - }, - } - c := newTestCRIService() - c.os.(*ostesting.FakeOS).StatFn = test.statFn - mounts := c.generateContainerMounts(testSandboxID, config) - assert.Equal(t, test.expectedMounts, mounts, desc) - } -} - -func TestPrivilegedBindMount(t *testing.T) { - testPid := uint32(1234) - c := newTestCRIService() - testSandboxID := "sandbox-id" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - - for desc, test := range map[string]struct { - privileged bool - expectedSysFSRO bool - expectedCgroupFSRO bool - }{ - "sysfs and cgroupfs should mount as 'ro' by default": { - expectedSysFSRO: true, - expectedCgroupFSRO: true, - }, - "sysfs and cgroupfs should not mount as 'ro' if privileged": { - privileged: true, - expectedSysFSRO: false, - expectedCgroupFSRO: false, - }, - } { - t.Logf("TestCase %q", desc) - - containerConfig.Linux.SecurityContext.Privileged = test.privileged - sandboxConfig.Linux.SecurityContext.Privileged = test.privileged - - spec, err := c.generateContainerSpec(t.Name(), testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - - assert.NoError(t, err) - if test.expectedSysFSRO { - checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"ro"}, []string{"rw"}) - } else { - checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"rw"}, []string{"ro"}) - } - if test.expectedCgroupFSRO { - checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, []string{"rw"}) - } else { - checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"rw"}, []string{"ro"}) - } - } -} - -func TestMountPropagation(t *testing.T) { - - sharedLookupMountFn := func(string) (mount.Info, error) { - return mount.Info{ - Mountpoint: "host-path", - Optional: "shared:", - }, nil - } - - slaveLookupMountFn := func(string) (mount.Info, error) { - return mount.Info{ - Mountpoint: "host-path", - Optional: "master:", - }, nil - } - - othersLookupMountFn := func(string) (mount.Info, error) { - return mount.Info{ - Mountpoint: "host-path", - Optional: "others", - }, nil - } - - for desc, test := range map[string]struct { - criMount *runtime.Mount - fakeLookupMountFn func(string) (mount.Info, error) - optionsCheck []string - expectErr bool - }{ - "HostPath should mount as 'rprivate' if propagation is MountPropagation_PROPAGATION_PRIVATE": { - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_PRIVATE, - }, - fakeLookupMountFn: nil, - optionsCheck: []string{"rbind", "rprivate"}, - expectErr: false, - }, - "HostPath should mount as 'rslave' if propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": { - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, - }, - fakeLookupMountFn: slaveLookupMountFn, - optionsCheck: []string{"rbind", "rslave"}, - expectErr: false, - }, - "HostPath should mount as 'rshared' if propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": { - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, - }, - fakeLookupMountFn: sharedLookupMountFn, - optionsCheck: []string{"rbind", "rshared"}, - expectErr: false, - }, - "HostPath should mount as 'rprivate' if propagation is illegal": { - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation(42), - }, - fakeLookupMountFn: nil, - optionsCheck: []string{"rbind", "rprivate"}, - expectErr: false, - }, - "Expect an error if HostPath isn't shared and mount propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": { - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, - }, - fakeLookupMountFn: slaveLookupMountFn, - expectErr: true, - }, - "Expect an error if HostPath isn't slave or shared and mount propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": { - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, - }, - fakeLookupMountFn: othersLookupMountFn, - expectErr: true, - }, - } { - t.Logf("TestCase %q", desc) - c := newTestCRIService() - c.os.(*ostesting.FakeOS).LookupMountFn = test.fakeLookupMountFn - config, _, _, _ := getCreateContainerTestData() - - var spec runtimespec.Spec - spec.Linux = &runtimespec.Linux{} - - err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec) - if test.expectErr { - require.Error(t, err) - } else { - require.NoError(t, err) - checkMount(t, spec.Mounts, test.criMount.HostPath, test.criMount.ContainerPath, "bind", test.optionsCheck, nil) - } - } -} - -func TestPidNamespace(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - testSandboxID := "sandbox-id" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for desc, test := range map[string]struct { - pidNS runtime.NamespaceMode - expected runtimespec.LinuxNamespace - }{ - "node namespace mode": { - pidNS: runtime.NamespaceMode_NODE, - expected: runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - Path: opts.GetPIDNamespace(testPid), - }, - }, - "container namespace mode": { - pidNS: runtime.NamespaceMode_CONTAINER, - expected: runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - }, - }, - "pod namespace mode": { - pidNS: runtime.NamespaceMode_POD, - expected: runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - Path: opts.GetPIDNamespace(testPid), - }, - }, - } { - t.Logf("TestCase %q", desc) - containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{Pid: test.pidNS} - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - assert.Contains(t, spec.Linux.Namespaces, test.expected) - } -} - -func TestNoDefaultRunMount(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - testSandboxID := "sandbox-id" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - for _, mount := range spec.Mounts { - assert.NotEqual(t, "/run", mount.Destination) - } -} - -func TestGenerateSeccompSpecOpts(t *testing.T) { - for desc, test := range map[string]struct { - profile string - privileged bool - disable bool - specOpts oci.SpecOpts - expectErr bool - }{ - "should return error if seccomp is specified when seccomp is not supported": { - profile: runtimeDefault, - disable: true, - expectErr: true, - }, - "should not return error if seccomp is not specified when seccomp is not supported": { - profile: "", - disable: true, - }, - "should not return error if seccomp is unconfined when seccomp is not supported": { - profile: unconfinedProfile, - disable: true, - }, - "should not set seccomp when privileged is true": { - profile: seccompDefaultProfile, - privileged: true, - }, - "should not set seccomp when seccomp is unconfined": { - profile: unconfinedProfile, - }, - "should not set seccomp when seccomp is not specified": { - profile: "", - }, - "should set default seccomp when seccomp is runtime/default": { - profile: runtimeDefault, - specOpts: seccomp.WithDefaultProfile(), - }, - "should set default seccomp when seccomp is docker/default": { - profile: dockerDefault, - specOpts: seccomp.WithDefaultProfile(), - }, - "should set specified profile when local profile is specified": { - profile: profileNamePrefix + "test-profile", - specOpts: seccomp.WithProfile("test-profile"), - }, - "should return error if specified profile is invalid": { - profile: "test-profile", - expectErr: true, - }, - } { - t.Logf("TestCase %q", desc) - specOpts, err := generateSeccompSpecOpts(test.profile, test.privileged, !test.disable) - assert.Equal(t, - reflect.ValueOf(test.specOpts).Pointer(), - reflect.ValueOf(specOpts).Pointer()) - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } -} - -func TestGenerateApparmorSpecOpts(t *testing.T) { - for desc, test := range map[string]struct { - profile string - privileged bool - disable bool - specOpts oci.SpecOpts - expectErr bool - }{ - "should return error if apparmor is specified when apparmor is not supported": { - profile: runtimeDefault, - disable: true, - expectErr: true, - }, - "should not return error if apparmor is not specified when apparmor is not supported": { - profile: "", - disable: true, - }, - "should set default apparmor when apparmor is not specified": { - profile: "", - specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), - }, - "should not apparmor when apparmor is not specified and privileged is true": { - profile: "", - privileged: true, - }, - "should not return error if apparmor is unconfined when apparmor is not supported": { - profile: unconfinedProfile, - disable: true, - }, - "should not apparmor when apparmor is unconfined": { - profile: unconfinedProfile, - }, - "should not apparmor when apparmor is unconfined and privileged is true": { - profile: unconfinedProfile, - privileged: true, - }, - "should set default apparmor when apparmor is runtime/default": { - profile: runtimeDefault, - specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), - }, - "should not apparmor when apparmor is default and privileged is true": { - profile: runtimeDefault, - privileged: true, - }, - "should set specified profile when local profile is specified": { - profile: profileNamePrefix + "test-profile", - specOpts: apparmor.WithProfile("test-profile"), - }, - "should set apparmor when local profile is specified and privileged is true": { - profile: profileNamePrefix + "test-profile", - privileged: true, - specOpts: apparmor.WithProfile("test-profile"), - }, - "should return error if specified profile is invalid": { - profile: "test-profile", - expectErr: true, - }, - } { - t.Logf("TestCase %q", desc) - specOpts, err := generateApparmorSpecOpts(test.profile, test.privileged, !test.disable) - assert.Equal(t, - reflect.ValueOf(test.specOpts).Pointer(), - reflect.ValueOf(specOpts).Pointer()) - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } -} - -func TestMaskedAndReadonlyPaths(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - - defaultSpec, err := oci.GenerateSpec(ctrdutil.NamespacedContext(), nil, &containers.Container{ID: testID}) - require.NoError(t, err) - - for desc, test := range map[string]struct { - disableProcMount bool - masked []string - readonly []string - expectedMasked []string - expectedReadonly []string - privileged bool - }{ - "should apply default if not specified when disable_proc_mount = true": { - disableProcMount: true, - masked: nil, - readonly: nil, - expectedMasked: defaultSpec.Linux.MaskedPaths, - expectedReadonly: defaultSpec.Linux.ReadonlyPaths, - privileged: false, - }, - "should always apply CRI specified paths when disable_proc_mount = false": { - disableProcMount: false, - masked: nil, - readonly: nil, - expectedMasked: nil, - expectedReadonly: nil, - privileged: false, - }, - "should be able to specify empty paths": { - masked: []string{}, - readonly: []string{}, - expectedMasked: []string{}, - expectedReadonly: []string{}, - privileged: false, - }, - "should apply CRI specified paths": { - masked: []string{"/proc"}, - readonly: []string{"/sys"}, - expectedMasked: []string{"/proc"}, - expectedReadonly: []string{"/sys"}, - privileged: false, - }, - "default should be nil for privileged": { - expectedMasked: nil, - expectedReadonly: nil, - privileged: true, - }, - "should be able to specify empty paths, esp. if privileged": { - masked: []string{}, - readonly: []string{}, - expectedMasked: nil, - expectedReadonly: nil, - privileged: true, - }, - "should not apply CRI specified paths if privileged": { - masked: []string{"/proc"}, - readonly: []string{"/sys"}, - expectedMasked: nil, - expectedReadonly: nil, - privileged: true, - }, - } { - t.Logf("TestCase %q", desc) - c.config.DisableProcMount = test.disableProcMount - containerConfig.Linux.SecurityContext.MaskedPaths = test.masked - containerConfig.Linux.SecurityContext.ReadonlyPaths = test.readonly - containerConfig.Linux.SecurityContext.Privileged = test.privileged - sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - Privileged: test.privileged, - } - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - if !test.privileged { // specCheck presumes an unprivileged container - specCheck(t, testID, testSandboxID, testPid, spec) - } - assert.Equal(t, test.expectedMasked, spec.Linux.MaskedPaths) - assert.Equal(t, test.expectedReadonly, spec.Linux.ReadonlyPaths) - } -} - -func TestHostname(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { - return "real-hostname", nil - } - for desc, test := range map[string]struct { - hostname string - networkNs runtime.NamespaceMode - expectedEnv string - }{ - "should add HOSTNAME=sandbox.Hostname for pod network namespace": { - hostname: "test-hostname", - networkNs: runtime.NamespaceMode_POD, - expectedEnv: "HOSTNAME=test-hostname", - }, - "should add HOSTNAME=sandbox.Hostname for host network namespace": { - hostname: "test-hostname", - networkNs: runtime.NamespaceMode_NODE, - expectedEnv: "HOSTNAME=test-hostname", - }, - "should add HOSTNAME=os.Hostname for host network namespace if sandbox.Hostname is not set": { - hostname: "", - networkNs: runtime.NamespaceMode_NODE, - expectedEnv: "HOSTNAME=real-hostname", - }, - } { - t.Logf("TestCase %q", desc) - sandboxConfig.Hostname = test.hostname - sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{Network: test.networkNs}, - } - spec, err := c.generateContainerSpec(testID, testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Contains(t, spec.Process.Env, test.expectedEnv) - } -} - -func TestDisableCgroup(t *testing.T) { - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - c.config.DisableCgroup = true - spec, err := c.generateContainerSpec("test-id", "sandbox-id", 1234, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - - t.Log("resource limit should not be set") - assert.Nil(t, spec.Linux.Resources.Memory) - assert.Nil(t, spec.Linux.Resources.CPU) - - t.Log("cgroup path should be empty") - assert.Empty(t, spec.Linux.CgroupsPath) -} - -func TestGenerateUserString(t *testing.T) { - type testcase struct { - // the name of the test case - name string - - u string - uid, gid *runtime.Int64Value - - result string - expectedError bool - } - testcases := []testcase{ - { - name: "Empty", - result: "", - }, - { - name: "Username Only", - u: "testuser", - result: "testuser", - }, - { - name: "Username, UID", - u: "testuser", - uid: &runtime.Int64Value{Value: 1}, - result: "testuser", - }, - { - name: "Username, UID, GID", - u: "testuser", - uid: &runtime.Int64Value{Value: 1}, - gid: &runtime.Int64Value{Value: 10}, - result: "testuser:10", - }, - { - name: "Username, GID", - u: "testuser", - gid: &runtime.Int64Value{Value: 10}, - result: "testuser:10", - }, - { - name: "UID only", - uid: &runtime.Int64Value{Value: 1}, - result: "1", - }, - { - name: "UID, GID", - uid: &runtime.Int64Value{Value: 1}, - gid: &runtime.Int64Value{Value: 10}, - result: "1:10", - }, - { - name: "GID only", - gid: &runtime.Int64Value{Value: 10}, - result: "", - expectedError: true, - }, - } - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - r, err := generateUserString(tc.u, tc.uid, tc.gid) - if tc.expectedError { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - assert.Equal(t, tc.result, r) - }) - } -} - -func TestPrivilegedDevices(t *testing.T) { - testPid := uint32(1234) - c := newTestCRIService() - testSandboxID := "sandbox-id" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - - for desc, test := range map[string]struct { - privileged bool - privilegedWithoutHostDevices bool - expectHostDevices bool - }{ - "expect no host devices when privileged is false": { - privileged: false, - privilegedWithoutHostDevices: false, - expectHostDevices: false, - }, - "expect no host devices when privileged is false and privilegedWithoutHostDevices is true": { - privileged: false, - privilegedWithoutHostDevices: true, - expectHostDevices: false, - }, - "expect host devices when privileged is true": { - privileged: true, - privilegedWithoutHostDevices: false, - expectHostDevices: true, - }, - "expect no host devices when privileged is true and privilegedWithoutHostDevices is true": { - privileged: true, - privilegedWithoutHostDevices: true, - expectHostDevices: false, - }, - } { - t.Logf("TestCase %q", desc) - - containerConfig.Linux.SecurityContext.Privileged = test.privileged - sandboxConfig.Linux.SecurityContext.Privileged = test.privileged - - ociRuntime := config.Runtime{ - PrivilegedWithoutHostDevices: test.privilegedWithoutHostDevices, - } - spec, err := c.generateContainerSpec(t.Name(), testSandboxID, testPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - - hostDevices, err := devices.HostDevices() - assert.NoError(t, err) - - if test.expectHostDevices { - assert.Len(t, spec.Linux.Devices, len(hostDevices)) - } else { - assert.Empty(t, spec.Linux.Devices) - } - } -} diff --git a/pkg/server/container_create_unix.go b/pkg/server/container_create_unix.go new file mode 100644 index 000000000..81b23943a --- /dev/null +++ b/pkg/server/container_create_unix.go @@ -0,0 +1,379 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "strconv" + "strings" + + "github.com/containerd/containerd/contrib/apparmor" + "github.com/containerd/containerd/contrib/seccomp" + "github.com/containerd/containerd/oci" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + "github.com/containerd/cri/pkg/annotations" + "github.com/containerd/cri/pkg/config" + customopts "github.com/containerd/cri/pkg/containerd/opts" +) + +const ( + // profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName. + profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747 + // runtimeDefault indicates that we should use or create a runtime default profile. + runtimeDefault = "runtime/default" + // dockerDefault indicates that we should use or create a docker default profile. + dockerDefault = "docker/default" + // appArmorDefaultProfileName is name to use when creating a default apparmor profile. + appArmorDefaultProfileName = "cri-containerd.apparmor.d" + // unconfinedProfile is a string indicating one should run a pod/containerd without a security profile + unconfinedProfile = "unconfined" + // seccompDefaultProfile is the default seccomp profile. + seccompDefaultProfile = dockerDefault +) + +// containerMounts sets up necessary container system file mounts +// including /dev/shm, /etc/hosts and /etc/resolv.conf. +func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { + var mounts []*runtime.Mount + securityContext := config.GetLinux().GetSecurityContext() + if !isInCRIMounts(etcHostname, config.GetMounts()) { + // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. + // For in-place upgrade, the old sandbox doesn't have the hostname file, + // do not mount this in that case. + // TODO(random-liu): Remove the check and always mount this when + // containerd 1.1 and 1.2 are deprecated. + hostpath := c.getSandboxHostname(sandboxID) + if _, err := c.os.Stat(hostpath); err == nil { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHostname, + HostPath: hostpath, + Readonly: securityContext.GetReadonlyRootfs(), + }) + } + } + + if !isInCRIMounts(etcHosts, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHosts, + HostPath: c.getSandboxHosts(sandboxID), + Readonly: securityContext.GetReadonlyRootfs(), + }) + } + + // Mount sandbox resolv.config. + // TODO: Need to figure out whether we should always mount it as read-only + if !isInCRIMounts(resolvConfPath, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: resolvConfPath, + HostPath: c.getResolvPath(sandboxID), + Readonly: securityContext.GetReadonlyRootfs(), + }) + } + + if !isInCRIMounts(devShm, config.GetMounts()) { + sandboxDevShm := c.getSandboxDevShm(sandboxID) + if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { + sandboxDevShm = devShm + } + mounts = append(mounts, &runtime.Mount{ + ContainerPath: devShm, + HostPath: sandboxDevShm, + Readonly: false, + }) + } + return mounts +} + +func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string, + config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, ociRuntime config.Runtime) (*runtimespec.Spec, error) { + + specOpts := []oci.SpecOpts{ + customopts.WithoutRunMount, + customopts.WithoutDefaultSecuritySettings, + customopts.WithRelativeRoot(relativeRootfsPath), + customopts.WithProcessArgs(config, imageConfig), + // this will be set based on the security context below + oci.WithNewPrivileges, + } + if config.GetWorkingDir() != "" { + specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) + } else if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if config.GetTty() { + specOpts = append(specOpts, oci.WithTTY) + } + + // Add HOSTNAME env. + var ( + err error + hostname = sandboxConfig.GetHostname() + ) + if hostname == "" { + if hostname, err = c.os.Hostname(); err != nil { + return nil, err + } + } + specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname})) + + // Apply envs from image config first, so that envs from container config + // can override them. + env := imageConfig.Env + for _, e := range config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + specOpts = append(specOpts, oci.WithEnv(env)) + + securityContext := config.GetLinux().GetSecurityContext() + selinuxOpt := securityContext.GetSelinuxOptions() + processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt) + if err != nil { + return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions()) + } + specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel)) + + if !c.config.DisableProcMount { + // Apply masked paths if specified. + // If the container is privileged, this will be cleared later on. + specOpts = append(specOpts, oci.WithMaskedPaths(securityContext.GetMaskedPaths())) + + // Apply readonly paths if specified. + // If the container is privileged, this will be cleared later on. + specOpts = append(specOpts, oci.WithReadonlyPaths(securityContext.GetReadonlyPaths())) + } + + if securityContext.GetPrivileged() { + if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() { + return nil, errors.New("no privileged container allowed in sandbox") + } + specOpts = append(specOpts, oci.WithPrivileged) + if !ociRuntime.PrivilegedWithoutHostDevices { + specOpts = append(specOpts, customopts.WithPrivilegedDevices) + } + } else { // not privileged + specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext)) + } + + // Clear all ambient capabilities. The implication of non-root + caps + // is not clearly defined in Kubernetes. + // See https://github.com/kubernetes/kubernetes/issues/56374 + // Keep docker's behavior for now. + specOpts = append(specOpts, + customopts.WithoutAmbientCaps, + customopts.WithSelinuxLabels(processLabel, mountLabel), + ) + + // TODO: Figure out whether we should set no new privilege for sandbox container by default + if securityContext.GetNoNewPrivs() { + specOpts = append(specOpts, oci.WithNoNewPrivileges) + } + // TODO(random-liu): [P1] Set selinux options (privileged or not). + if securityContext.GetReadonlyRootfs() { + specOpts = append(specOpts, oci.WithRootFSReadonly()) + } + + if c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDisabledCgroups) + } else { + specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources())) + if sandboxConfig.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id) + specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) + } + } + + supplementalGroups := securityContext.GetSupplementalGroups() + + for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, + ociRuntime.PodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, + customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), + customopts.WithPodNamespaces(securityContext, sandboxPid), + customopts.WithSupplementalGroups(supplementalGroups), + customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), + customopts.WithAnnotation(annotations.SandboxID, sandboxID), + ) + + return runtimeSpec(id, specOpts...) +} + +func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + var specOpts []oci.SpecOpts + securityContext := config.GetLinux().GetSecurityContext() + // Set container username. This could only be done by containerd, because it needs + // access to the container rootfs. Pass user name to containerd, and let it overwrite + // the spec for us. + userstr, err := generateUserString( + securityContext.GetRunAsUsername(), + securityContext.GetRunAsUser(), + securityContext.GetRunAsGroup()) + if err != nil { + return nil, errors.Wrap(err, "failed to generate user string") + } + if userstr == "" { + // Lastly, since no user override was passed via CRI try to set via OCI + // Image + userstr = imageConfig.User + } + if userstr != "" { + specOpts = append(specOpts, oci.WithUser(userstr)) + } + + if securityContext.GetRunAsUsername() != "" { + userstr = securityContext.GetRunAsUsername() + } else { + // Even if RunAsUser is not set, we still call `GetValue` to get uid 0. + // Because it is still useful to get additional gids for uid 0. + userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10) + } + specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr)) + + apparmorSpecOpts, err := generateApparmorSpecOpts( + securityContext.GetApparmorProfile(), + securityContext.GetPrivileged(), + c.apparmorEnabled()) + if err != nil { + return nil, errors.Wrap(err, "failed to generate apparmor spec opts") + } + if apparmorSpecOpts != nil { + specOpts = append(specOpts, apparmorSpecOpts) + } + + seccompSpecOpts, err := generateSeccompSpecOpts( + securityContext.GetSeccompProfilePath(), + securityContext.GetPrivileged(), + c.seccompEnabled()) + if err != nil { + return nil, errors.Wrap(err, "failed to generate seccomp spec opts") + } + if seccompSpecOpts != nil { + specOpts = append(specOpts, seccompSpecOpts) + } + return specOpts, nil +} + +// generateSeccompSpecOpts generates containerd SpecOpts for seccomp. +func generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) { + if privileged { + // Do not set seccomp profile when container is privileged + return nil, nil + } + // Set seccomp profile + if seccompProf == runtimeDefault || seccompProf == dockerDefault { + // use correct default profile (Eg. if not configured otherwise, the default is docker/default) + seccompProf = seccompDefaultProfile + } + if !seccompEnabled { + if seccompProf != "" && seccompProf != unconfinedProfile { + return nil, errors.New("seccomp is not supported") + } + return nil, nil + } + switch seccompProf { + case "", unconfinedProfile: + // Do not set seccomp profile. + return nil, nil + case dockerDefault: + // Note: WithDefaultProfile specOpts must be added after capabilities + return seccomp.WithDefaultProfile(), nil + default: + // Require and Trim default profile name prefix + if !strings.HasPrefix(seccompProf, profileNamePrefix) { + return nil, errors.Errorf("invalid seccomp profile %q", seccompProf) + } + return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil + } +} + +// generateApparmorSpecOpts generates containerd SpecOpts for apparmor. +func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) { + if !apparmorEnabled { + // Should fail loudly if user try to specify apparmor profile + // but we don't support it. + if apparmorProf != "" && apparmorProf != unconfinedProfile { + return nil, errors.New("apparmor is not supported") + } + return nil, nil + } + switch apparmorProf { + // Based on kubernetes#51746, default apparmor profile should be applied + // for when apparmor is not specified. + case runtimeDefault, "": + if privileged { + // Do not set apparmor profile when container is privileged + return nil, nil + } + // TODO (mikebrow): delete created apparmor default profile + return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil + case unconfinedProfile: + return nil, nil + default: + // Require and Trim default profile name prefix + if !strings.HasPrefix(apparmorProf, profileNamePrefix) { + return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf) + } + return apparmor.WithProfile(strings.TrimPrefix(apparmorProf, profileNamePrefix)), nil + } +} + +// generateUserString generates valid user string based on OCI Image Spec +// v1.0.0. +// +// CRI defines that the following combinations are valid: +// +// (none) -> "" +// username -> username +// username, uid -> username +// username, uid, gid -> username:gid +// username, gid -> username:gid +// uid -> uid +// uid, gid -> uid:gid +// gid -> error +// +// TODO(random-liu): Add group name support in CRI. +func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) { + var userstr, groupstr string + if uid != nil { + userstr = strconv.FormatInt(uid.GetValue(), 10) + } + if username != "" { + userstr = username + } + if gid != nil { + groupstr = strconv.FormatInt(gid.GetValue(), 10) + } + if userstr == "" { + if groupstr != "" { + return "", errors.Errorf("user group %q is specified without user", groupstr) + } + return "", nil + } + if groupstr != "" { + userstr = userstr + ":" + groupstr + } + return userstr, nil +} diff --git a/pkg/server/container_create_unix_test.go b/pkg/server/container_create_unix_test.go new file mode 100644 index 000000000..96a623a32 --- /dev/null +++ b/pkg/server/container_create_unix_test.go @@ -0,0 +1,1314 @@ +// +build !windows + +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "context" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/contrib/apparmor" + "github.com/containerd/containerd/contrib/seccomp" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/oci" + "github.com/containerd/cri/pkg/annotations" + "github.com/containerd/cri/pkg/config" + "github.com/containerd/cri/pkg/containerd/opts" + ctrdutil "github.com/containerd/cri/pkg/containerd/util" + ostesting "github.com/containerd/cri/pkg/os/testing" + "github.com/containerd/cri/pkg/util" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/opencontainers/runc/libcontainer/devices" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string, + contains, notcontains []string) { + found := false + for _, m := range mounts { + if m.Source == src && m.Destination == dest { + assert.Equal(t, m.Type, typ) + for _, c := range contains { + assert.Contains(t, m.Options, c) + } + for _, n := range notcontains { + assert.NotContains(t, m.Options, n) + } + found = true + break + } + } + assert.True(t, found, "mount from %q to %q not found", src, dest) +} + +func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, + *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Image: &runtime.ImageSpec{ + Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", + }, + Command: []string{"test", "command"}, + Args: []string{"test", "args"}, + WorkingDir: "test-cwd", + Envs: []*runtime.KeyValue{ + {Key: "k1", Value: "v1"}, + {Key: "k2", Value: "v2"}, + {Key: "k3", Value: "v3=v3bis"}, + {Key: "k4", Value: "v4=v4bis=foop"}, + }, + Mounts: []*runtime.Mount{ + // everything default + { + ContainerPath: "container-path-1", + HostPath: "host-path-1", + }, + // readOnly + { + ContainerPath: "container-path-2", + HostPath: "host-path-2", + Readonly: true, + }, + }, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + Linux: &runtime.LinuxContainerConfig{ + Resources: &runtime.LinuxContainerResources{ + CpuPeriod: 100, + CpuQuota: 200, + CpuShares: 300, + MemoryLimitInBytes: 400, + OomScoreAdj: 500, + CpusetCpus: "0-1", + CpusetMems: "2-3", + }, + SecurityContext: &runtime.LinuxContainerSecurityContext{ + SupplementalGroups: []int64{1111, 2222}, + NoNewPrivs: true, + }, + }, + } + sandboxConfig := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-sandbox-name", + Uid: "test-sandbox-uid", + Namespace: "test-sandbox-ns", + Attempt: 2, + }, + Annotations: map[string]string{"c": "d"}, + Linux: &runtime.LinuxPodSandboxConfig{ + CgroupParent: "/test/cgroup/parent", + SecurityContext: &runtime.LinuxSandboxSecurityContext{}, + }, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, + Entrypoint: []string{"/entrypoint"}, + Cmd: []string{"cmd"}, + WorkingDir: "/workspace", + } + specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { + assert.Equal(t, relativeRootfsPath, spec.Root.Path) + assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) + assert.Equal(t, "test-cwd", spec.Process.Cwd) + assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") + assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") + + t.Logf("Check cgroups bind mount") + checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, nil) + + t.Logf("Check bind mount") + checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rbind", "rprivate", "rw"}, nil) + checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"rbind", "rprivate", "ro"}, nil) + + t.Logf("Check resource limits") + assert.EqualValues(t, *spec.Linux.Resources.CPU.Period, 100) + assert.EqualValues(t, *spec.Linux.Resources.CPU.Quota, 200) + assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, 300) + assert.EqualValues(t, spec.Linux.Resources.CPU.Cpus, "0-1") + assert.EqualValues(t, spec.Linux.Resources.CPU.Mems, "2-3") + assert.EqualValues(t, *spec.Linux.Resources.Memory.Limit, 400) + assert.EqualValues(t, *spec.Process.OOMScoreAdj, 500) + + t.Logf("Check supplemental groups") + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) + + t.Logf("Check no_new_privs") + assert.Equal(t, spec.Process.NoNewPrivileges, true) + + t.Logf("Check cgroup path") + assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) + + t.Logf("Check namespaces") + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + Path: opts.GetNetworkNamespace(sandboxPid), + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.IPCNamespace, + Path: opts.GetIPCNamespace(sandboxPid), + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UTSNamespace, + Path: opts.GetUTSNamespace(sandboxPid), + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + Path: opts.GetPIDNamespace(sandboxPid), + }) + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) + } + return config, sandboxConfig, imageConfig, specCheck +} + +func TestGeneralContainerSpec(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + testSandboxID := "sandbox-id" + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) +} + +func TestContainerCapabilities(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + for desc, test := range map[string]struct { + capability *runtime.Capability + includes []string + excludes []string + }{ + "should be able to add/drop capabilities": { + capability: &runtime.Capability{ + AddCapabilities: []string{"SYS_ADMIN"}, + DropCapabilities: []string{"CHOWN"}, + }, + includes: []string{"CAP_SYS_ADMIN"}, + excludes: []string{"CAP_CHOWN"}, + }, + "should be able to add all capabilities": { + capability: &runtime.Capability{ + AddCapabilities: []string{"ALL"}, + }, + includes: oci.GetAllCapabilities(), + }, + "should be able to drop all capabilities": { + capability: &runtime.Capability{ + DropCapabilities: []string{"ALL"}, + }, + excludes: oci.GetAllCapabilities(), + }, + "should be able to drop capabilities with add all": { + capability: &runtime.Capability{ + AddCapabilities: []string{"ALL"}, + DropCapabilities: []string{"CHOWN"}, + }, + includes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_CHOWN"), + excludes: []string{"CAP_CHOWN"}, + }, + "should be able to add capabilities with drop all": { + capability: &runtime.Capability{ + AddCapabilities: []string{"SYS_ADMIN"}, + DropCapabilities: []string{"ALL"}, + }, + includes: []string{"CAP_SYS_ADMIN"}, + excludes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_SYS_ADMIN"), + }, + } { + t.Logf("TestCase %q", desc) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + + containerConfig.Linux.SecurityContext.Capabilities = test.capability + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + for _, include := range test.includes { + assert.Contains(t, spec.Process.Capabilities.Bounding, include) + assert.Contains(t, spec.Process.Capabilities.Effective, include) + assert.Contains(t, spec.Process.Capabilities.Inheritable, include) + assert.Contains(t, spec.Process.Capabilities.Permitted, include) + } + for _, exclude := range test.excludes { + assert.NotContains(t, spec.Process.Capabilities.Bounding, exclude) + assert.NotContains(t, spec.Process.Capabilities.Effective, exclude) + assert.NotContains(t, spec.Process.Capabilities.Inheritable, exclude) + assert.NotContains(t, spec.Process.Capabilities.Permitted, exclude) + } + assert.Empty(t, spec.Process.Capabilities.Ambient) + } +} + +func TestContainerSpecTty(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for _, tty := range []bool{true, false} { + containerConfig.Tty = tty + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Equal(t, tty, spec.Process.Terminal) + if tty { + assert.Contains(t, spec.Process.Env, "TERM=xterm") + } else { + assert.NotContains(t, spec.Process.Env, "TERM=xterm") + } + } +} + +func TestPodAnnotationPassthroughContainerSpec(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + + for desc, test := range map[string]struct { + podAnnotations []string + configChange func(*runtime.PodSandboxConfig) + specCheck func(*testing.T, *runtimespec.Spec) + }{ + "a passthrough annotation should be passed as an OCI annotation": { + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + }, + }, + "a non-passthrough annotation should not be passed as an OCI annotation": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["d"] = "e" + }, + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + _, ok := spec.Annotations["d"] + assert.False(t, ok) + }, + }, + "passthrough annotations should support wildcard match": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["t.f"] = "j" + c.Annotations["z.g"] = "o" + c.Annotations["z"] = "o" + c.Annotations["y.ca"] = "b" + c.Annotations["y"] = "b" + }, + podAnnotations: []string{"t*", "z.*", "y.c*"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + t.Logf("%+v", spec.Annotations) + assert.Equal(t, spec.Annotations["t.f"], "j") + assert.Equal(t, spec.Annotations["z.g"], "o") + assert.Equal(t, spec.Annotations["y.ca"], "b") + _, ok := spec.Annotations["y"] + assert.False(t, ok) + _, ok = spec.Annotations["z"] + assert.False(t, ok) + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + if test.configChange != nil { + test.configChange(sandboxConfig) + } + + ociRuntime := config.Runtime{ + PodAnnotations: test.podAnnotations, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", + containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, testSandboxID, testPid, spec) + if test.specCheck != nil { + test.specCheck(t, spec) + } + }) + } + +} + +func TestContainerSpecReadonlyRootfs(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for _, readonly := range []bool{true, false} { + containerConfig.Linux.SecurityContext.ReadonlyRootfs = readonly + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Equal(t, readonly, spec.Root.Readonly) + } +} + +func TestContainerSpecWithExtraMounts(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + mountInConfig := &runtime.Mount{ + // Test cleanpath + ContainerPath: "test-container-path/", + HostPath: "test-host-path", + Readonly: false, + } + containerConfig.Mounts = append(containerConfig.Mounts, mountInConfig) + extraMounts := []*runtime.Mount{ + { + ContainerPath: "test-container-path", + HostPath: "test-host-path-extra", + Readonly: true, + }, + { + ContainerPath: "/sys", + HostPath: "test-sys-extra", + Readonly: false, + }, + { + ContainerPath: "/dev", + HostPath: "test-dev-extra", + Readonly: false, + }, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + var mounts, sysMounts, devMounts []runtimespec.Mount + for _, m := range spec.Mounts { + if strings.HasPrefix(m.Destination, "test-container-path") { + mounts = append(mounts, m) + } else if m.Destination == "/sys" { + sysMounts = append(sysMounts, m) + } else if strings.HasPrefix(m.Destination, "/dev") { + devMounts = append(devMounts, m) + } + } + t.Logf("CRI mount should override extra mount") + require.Len(t, mounts, 1) + assert.Equal(t, "test-host-path", mounts[0].Source) + assert.Contains(t, mounts[0].Options, "rw") + + t.Logf("Extra mount should override default mount") + require.Len(t, sysMounts, 1) + assert.Equal(t, "test-sys-extra", sysMounts[0].Source) + assert.Contains(t, sysMounts[0].Options, "rw") + + t.Logf("Dev mount should override all default dev mounts") + require.Len(t, devMounts, 1) + assert.Equal(t, "test-dev-extra", devMounts[0].Source) + assert.Contains(t, devMounts[0].Options, "rw") +} + +func TestContainerAndSandboxPrivileged(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for desc, test := range map[string]struct { + containerPrivileged bool + sandboxPrivileged bool + expectError bool + }{ + "privileged container in non-privileged sandbox should fail": { + containerPrivileged: true, + sandboxPrivileged: false, + expectError: true, + }, + "privileged container in privileged sandbox should be fine": { + containerPrivileged: true, + sandboxPrivileged: true, + expectError: false, + }, + "non-privileged container in privileged sandbox should be fine": { + containerPrivileged: false, + sandboxPrivileged: true, + expectError: false, + }, + "non-privileged container in non-privileged sandbox should be fine": { + containerPrivileged: false, + sandboxPrivileged: false, + expectError: false, + }, + } { + t.Logf("TestCase %q", desc) + containerConfig.Linux.SecurityContext.Privileged = test.containerPrivileged + sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + Privileged: test.sandboxPrivileged, + } + _, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + if test.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } +} + +func TestContainerSpecCommand(t *testing.T) { + for desc, test := range map[string]struct { + criEntrypoint []string + criArgs []string + imageEntrypoint []string + imageArgs []string + expected []string + expectErr bool + }{ + "should use cri entrypoint if it's specified": { + criEntrypoint: []string{"a", "b"}, + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"a", "b"}, + }, + "should use cri entrypoint if it's specified even if it's empty": { + criEntrypoint: []string{}, + criArgs: []string{"a", "b"}, + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"a", "b"}, + }, + "should use cri entrypoint and args if they are specified": { + criEntrypoint: []string{"a", "b"}, + criArgs: []string{"c", "d"}, + imageEntrypoint: []string{"e", "f"}, + imageArgs: []string{"g", "h"}, + expected: []string{"a", "b", "c", "d"}, + }, + "should use image entrypoint if cri entrypoint is not specified": { + criArgs: []string{"a", "b"}, + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"c", "d", "a", "b"}, + }, + "should use image args if both cri entrypoint and args are not specified": { + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"c", "d", "e", "f"}, + }, + "should return error if both entrypoint and args are empty": { + expectErr: true, + }, + } { + + config, _, imageConfig, _ := getCreateContainerTestData() + config.Command = test.criEntrypoint + config.Args = test.criArgs + imageConfig.Entrypoint = test.imageEntrypoint + imageConfig.Cmd = test.imageArgs + + var spec runtimespec.Spec + err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec) + if test.expectErr { + assert.Error(t, err) + continue + } + assert.NoError(t, err) + assert.Equal(t, test.expected, spec.Process.Args, desc) + } +} + +func TestContainerMounts(t *testing.T) { + const testSandboxID = "test-id" + for desc, test := range map[string]struct { + statFn func(string) (os.FileInfo, error) + criMounts []*runtime.Mount + securityContext *runtime.LinuxContainerSecurityContext + expectedMounts []*runtime.Mount + }{ + "should setup ro mount when rootfs is read-only": { + securityContext: &runtime.LinuxContainerSecurityContext{ + ReadonlyRootfs: true, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + }, + }, + }, + "should setup rw mount when rootfs is read-write": { + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + }, + }, + }, + "should use host /dev/shm when host ipc is set": { + securityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE}, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + }, + { + ContainerPath: "/dev/shm", + HostPath: "/dev/shm", + Readonly: false, + }, + }, + }, + "should skip container mounts if already mounted by CRI": { + criMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: "/test-etc-hostname", + }, + { + ContainerPath: "/etc/hosts", + HostPath: "/test-etc-host", + }, + { + ContainerPath: resolvConfPath, + HostPath: "test-resolv-conf", + }, + { + ContainerPath: "/dev/shm", + HostPath: "test-dev-shm", + }, + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: nil, + }, + "should skip hostname mount if the old sandbox doesn't have hostname file": { + statFn: func(path string) (os.FileInfo, error) { + assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path) + return nil, errors.New("random error") + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + }, + }, + }, + } { + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Mounts: test.criMounts, + Linux: &runtime.LinuxContainerConfig{ + SecurityContext: test.securityContext, + }, + } + c := newTestCRIService() + c.os.(*ostesting.FakeOS).StatFn = test.statFn + mounts := c.containerMounts(testSandboxID, config) + assert.Equal(t, test.expectedMounts, mounts, desc) + } +} + +func TestPrivilegedBindMount(t *testing.T) { + testPid := uint32(1234) + c := newTestCRIService() + testSandboxID := "sandbox-id" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + + for desc, test := range map[string]struct { + privileged bool + expectedSysFSRO bool + expectedCgroupFSRO bool + }{ + "sysfs and cgroupfs should mount as 'ro' by default": { + expectedSysFSRO: true, + expectedCgroupFSRO: true, + }, + "sysfs and cgroupfs should not mount as 'ro' if privileged": { + privileged: true, + expectedSysFSRO: false, + expectedCgroupFSRO: false, + }, + } { + t.Logf("TestCase %q", desc) + + containerConfig.Linux.SecurityContext.Privileged = test.privileged + sandboxConfig.Linux.SecurityContext.Privileged = test.privileged + + spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + + assert.NoError(t, err) + if test.expectedSysFSRO { + checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"ro"}, []string{"rw"}) + } else { + checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"rw"}, []string{"ro"}) + } + if test.expectedCgroupFSRO { + checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, []string{"rw"}) + } else { + checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"rw"}, []string{"ro"}) + } + } +} + +func TestMountPropagation(t *testing.T) { + + sharedLookupMountFn := func(string) (mount.Info, error) { + return mount.Info{ + Mountpoint: "host-path", + Optional: "shared:", + }, nil + } + + slaveLookupMountFn := func(string) (mount.Info, error) { + return mount.Info{ + Mountpoint: "host-path", + Optional: "master:", + }, nil + } + + othersLookupMountFn := func(string) (mount.Info, error) { + return mount.Info{ + Mountpoint: "host-path", + Optional: "others", + }, nil + } + + for desc, test := range map[string]struct { + criMount *runtime.Mount + fakeLookupMountFn func(string) (mount.Info, error) + optionsCheck []string + expectErr bool + }{ + "HostPath should mount as 'rprivate' if propagation is MountPropagation_PROPAGATION_PRIVATE": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_PRIVATE, + }, + fakeLookupMountFn: nil, + optionsCheck: []string{"rbind", "rprivate"}, + expectErr: false, + }, + "HostPath should mount as 'rslave' if propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, + }, + fakeLookupMountFn: slaveLookupMountFn, + optionsCheck: []string{"rbind", "rslave"}, + expectErr: false, + }, + "HostPath should mount as 'rshared' if propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, + }, + fakeLookupMountFn: sharedLookupMountFn, + optionsCheck: []string{"rbind", "rshared"}, + expectErr: false, + }, + "HostPath should mount as 'rprivate' if propagation is illegal": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation(42), + }, + fakeLookupMountFn: nil, + optionsCheck: []string{"rbind", "rprivate"}, + expectErr: false, + }, + "Expect an error if HostPath isn't shared and mount propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, + }, + fakeLookupMountFn: slaveLookupMountFn, + expectErr: true, + }, + "Expect an error if HostPath isn't slave or shared and mount propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, + }, + fakeLookupMountFn: othersLookupMountFn, + expectErr: true, + }, + } { + t.Logf("TestCase %q", desc) + c := newTestCRIService() + c.os.(*ostesting.FakeOS).LookupMountFn = test.fakeLookupMountFn + config, _, _, _ := getCreateContainerTestData() + + var spec runtimespec.Spec + spec.Linux = &runtimespec.Linux{} + + err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec) + if test.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + checkMount(t, spec.Mounts, test.criMount.HostPath, test.criMount.ContainerPath, "bind", test.optionsCheck, nil) + } + } +} + +func TestPidNamespace(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + testSandboxID := "sandbox-id" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for desc, test := range map[string]struct { + pidNS runtime.NamespaceMode + expected runtimespec.LinuxNamespace + }{ + "node namespace mode": { + pidNS: runtime.NamespaceMode_NODE, + expected: runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + Path: opts.GetPIDNamespace(testPid), + }, + }, + "container namespace mode": { + pidNS: runtime.NamespaceMode_CONTAINER, + expected: runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + }, + }, + "pod namespace mode": { + pidNS: runtime.NamespaceMode_POD, + expected: runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + Path: opts.GetPIDNamespace(testPid), + }, + }, + } { + t.Logf("TestCase %q", desc) + containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{Pid: test.pidNS} + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + assert.Contains(t, spec.Linux.Namespaces, test.expected) + } +} + +func TestNoDefaultRunMount(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + testSandboxID := "sandbox-id" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + for _, mount := range spec.Mounts { + assert.NotEqual(t, "/run", mount.Destination) + } +} + +func TestGenerateSeccompSpecOpts(t *testing.T) { + for desc, test := range map[string]struct { + profile string + privileged bool + disable bool + specOpts oci.SpecOpts + expectErr bool + }{ + "should return error if seccomp is specified when seccomp is not supported": { + profile: runtimeDefault, + disable: true, + expectErr: true, + }, + "should not return error if seccomp is not specified when seccomp is not supported": { + profile: "", + disable: true, + }, + "should not return error if seccomp is unconfined when seccomp is not supported": { + profile: unconfinedProfile, + disable: true, + }, + "should not set seccomp when privileged is true": { + profile: seccompDefaultProfile, + privileged: true, + }, + "should not set seccomp when seccomp is unconfined": { + profile: unconfinedProfile, + }, + "should not set seccomp when seccomp is not specified": { + profile: "", + }, + "should set default seccomp when seccomp is runtime/default": { + profile: runtimeDefault, + specOpts: seccomp.WithDefaultProfile(), + }, + "should set default seccomp when seccomp is docker/default": { + profile: dockerDefault, + specOpts: seccomp.WithDefaultProfile(), + }, + "should set specified profile when local profile is specified": { + profile: profileNamePrefix + "test-profile", + specOpts: seccomp.WithProfile("test-profile"), + }, + "should return error if specified profile is invalid": { + profile: "test-profile", + expectErr: true, + }, + } { + t.Logf("TestCase %q", desc) + specOpts, err := generateSeccompSpecOpts(test.profile, test.privileged, !test.disable) + assert.Equal(t, + reflect.ValueOf(test.specOpts).Pointer(), + reflect.ValueOf(specOpts).Pointer()) + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } +} + +func TestGenerateApparmorSpecOpts(t *testing.T) { + for desc, test := range map[string]struct { + profile string + privileged bool + disable bool + specOpts oci.SpecOpts + expectErr bool + }{ + "should return error if apparmor is specified when apparmor is not supported": { + profile: runtimeDefault, + disable: true, + expectErr: true, + }, + "should not return error if apparmor is not specified when apparmor is not supported": { + profile: "", + disable: true, + }, + "should set default apparmor when apparmor is not specified": { + profile: "", + specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), + }, + "should not apparmor when apparmor is not specified and privileged is true": { + profile: "", + privileged: true, + }, + "should not return error if apparmor is unconfined when apparmor is not supported": { + profile: unconfinedProfile, + disable: true, + }, + "should not apparmor when apparmor is unconfined": { + profile: unconfinedProfile, + }, + "should not apparmor when apparmor is unconfined and privileged is true": { + profile: unconfinedProfile, + privileged: true, + }, + "should set default apparmor when apparmor is runtime/default": { + profile: runtimeDefault, + specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), + }, + "should not apparmor when apparmor is default and privileged is true": { + profile: runtimeDefault, + privileged: true, + }, + "should set specified profile when local profile is specified": { + profile: profileNamePrefix + "test-profile", + specOpts: apparmor.WithProfile("test-profile"), + }, + "should set apparmor when local profile is specified and privileged is true": { + profile: profileNamePrefix + "test-profile", + privileged: true, + specOpts: apparmor.WithProfile("test-profile"), + }, + "should return error if specified profile is invalid": { + profile: "test-profile", + expectErr: true, + }, + } { + t.Logf("TestCase %q", desc) + specOpts, err := generateApparmorSpecOpts(test.profile, test.privileged, !test.disable) + assert.Equal(t, + reflect.ValueOf(test.specOpts).Pointer(), + reflect.ValueOf(specOpts).Pointer()) + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } +} + +func TestMaskedAndReadonlyPaths(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + + defaultSpec, err := oci.GenerateSpec(ctrdutil.NamespacedContext(), nil, &containers.Container{ID: testID}) + require.NoError(t, err) + + for desc, test := range map[string]struct { + disableProcMount bool + masked []string + readonly []string + expectedMasked []string + expectedReadonly []string + privileged bool + }{ + "should apply default if not specified when disable_proc_mount = true": { + disableProcMount: true, + masked: nil, + readonly: nil, + expectedMasked: defaultSpec.Linux.MaskedPaths, + expectedReadonly: defaultSpec.Linux.ReadonlyPaths, + privileged: false, + }, + "should always apply CRI specified paths when disable_proc_mount = false": { + disableProcMount: false, + masked: nil, + readonly: nil, + expectedMasked: nil, + expectedReadonly: nil, + privileged: false, + }, + "should be able to specify empty paths": { + masked: []string{}, + readonly: []string{}, + expectedMasked: []string{}, + expectedReadonly: []string{}, + privileged: false, + }, + "should apply CRI specified paths": { + masked: []string{"/proc"}, + readonly: []string{"/sys"}, + expectedMasked: []string{"/proc"}, + expectedReadonly: []string{"/sys"}, + privileged: false, + }, + "default should be nil for privileged": { + expectedMasked: nil, + expectedReadonly: nil, + privileged: true, + }, + "should be able to specify empty paths, esp. if privileged": { + masked: []string{}, + readonly: []string{}, + expectedMasked: nil, + expectedReadonly: nil, + privileged: true, + }, + "should not apply CRI specified paths if privileged": { + masked: []string{"/proc"}, + readonly: []string{"/sys"}, + expectedMasked: nil, + expectedReadonly: nil, + privileged: true, + }, + } { + t.Logf("TestCase %q", desc) + c.config.DisableProcMount = test.disableProcMount + containerConfig.Linux.SecurityContext.MaskedPaths = test.masked + containerConfig.Linux.SecurityContext.ReadonlyPaths = test.readonly + containerConfig.Linux.SecurityContext.Privileged = test.privileged + sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + Privileged: test.privileged, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + if !test.privileged { // specCheck presumes an unprivileged container + specCheck(t, testID, testSandboxID, testPid, spec) + } + assert.Equal(t, test.expectedMasked, spec.Linux.MaskedPaths) + assert.Equal(t, test.expectedReadonly, spec.Linux.ReadonlyPaths) + } +} + +func TestHostname(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { + return "real-hostname", nil + } + for desc, test := range map[string]struct { + hostname string + networkNs runtime.NamespaceMode + expectedEnv string + }{ + "should add HOSTNAME=sandbox.Hostname for pod network namespace": { + hostname: "test-hostname", + networkNs: runtime.NamespaceMode_POD, + expectedEnv: "HOSTNAME=test-hostname", + }, + "should add HOSTNAME=sandbox.Hostname for host network namespace": { + hostname: "test-hostname", + networkNs: runtime.NamespaceMode_NODE, + expectedEnv: "HOSTNAME=test-hostname", + }, + "should add HOSTNAME=os.Hostname for host network namespace if sandbox.Hostname is not set": { + hostname: "", + networkNs: runtime.NamespaceMode_NODE, + expectedEnv: "HOSTNAME=real-hostname", + }, + } { + t.Logf("TestCase %q", desc) + sandboxConfig.Hostname = test.hostname + sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{Network: test.networkNs}, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Contains(t, spec.Process.Env, test.expectedEnv) + } +} + +func TestDisableCgroup(t *testing.T) { + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + c.config.DisableCgroup = true + spec, err := c.containerSpec("test-id", "sandbox-id", 1234, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + + t.Log("resource limit should not be set") + assert.Nil(t, spec.Linux.Resources.Memory) + assert.Nil(t, spec.Linux.Resources.CPU) + + t.Log("cgroup path should be empty") + assert.Empty(t, spec.Linux.CgroupsPath) +} + +func TestGenerateUserString(t *testing.T) { + type testcase struct { + // the name of the test case + name string + + u string + uid, gid *runtime.Int64Value + + result string + expectedError bool + } + testcases := []testcase{ + { + name: "Empty", + result: "", + }, + { + name: "Username Only", + u: "testuser", + result: "testuser", + }, + { + name: "Username, UID", + u: "testuser", + uid: &runtime.Int64Value{Value: 1}, + result: "testuser", + }, + { + name: "Username, UID, GID", + u: "testuser", + uid: &runtime.Int64Value{Value: 1}, + gid: &runtime.Int64Value{Value: 10}, + result: "testuser:10", + }, + { + name: "Username, GID", + u: "testuser", + gid: &runtime.Int64Value{Value: 10}, + result: "testuser:10", + }, + { + name: "UID only", + uid: &runtime.Int64Value{Value: 1}, + result: "1", + }, + { + name: "UID, GID", + uid: &runtime.Int64Value{Value: 1}, + gid: &runtime.Int64Value{Value: 10}, + result: "1:10", + }, + { + name: "GID only", + gid: &runtime.Int64Value{Value: 10}, + result: "", + expectedError: true, + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + r, err := generateUserString(tc.u, tc.uid, tc.gid) + if tc.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + assert.Equal(t, tc.result, r) + }) + } +} + +func TestPrivilegedDevices(t *testing.T) { + testPid := uint32(1234) + c := newTestCRIService() + testSandboxID := "sandbox-id" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + + for desc, test := range map[string]struct { + privileged bool + privilegedWithoutHostDevices bool + expectHostDevices bool + }{ + "expect no host devices when privileged is false": { + privileged: false, + privilegedWithoutHostDevices: false, + expectHostDevices: false, + }, + "expect no host devices when privileged is false and privilegedWithoutHostDevices is true": { + privileged: false, + privilegedWithoutHostDevices: true, + expectHostDevices: false, + }, + "expect host devices when privileged is true": { + privileged: true, + privilegedWithoutHostDevices: false, + expectHostDevices: true, + }, + "expect no host devices when privileged is true and privilegedWithoutHostDevices is true": { + privileged: true, + privilegedWithoutHostDevices: true, + expectHostDevices: false, + }, + } { + t.Logf("TestCase %q", desc) + + containerConfig.Linux.SecurityContext.Privileged = test.privileged + sandboxConfig.Linux.SecurityContext.Privileged = test.privileged + + ociRuntime := config.Runtime{ + PrivilegedWithoutHostDevices: test.privilegedWithoutHostDevices, + } + spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + + hostDevices, err := devices.HostDevices() + assert.NoError(t, err) + + if test.expectHostDevices { + assert.Len(t, spec.Linux.Devices, len(hostDevices)) + } else { + assert.Empty(t, spec.Linux.Devices) + } + } +} diff --git a/pkg/server/container_create_windows.go b/pkg/server/container_create_windows.go new file mode 100644 index 000000000..af597f506 --- /dev/null +++ b/pkg/server/container_create_windows.go @@ -0,0 +1,46 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/oci" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + "github.com/containerd/cri/pkg/config" +) + +// No container mounts for windows. +func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { + return nil +} + +// TODO(windows): Add windows container spec. +func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string, + config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, ociRuntime config.Runtime) (*runtimespec.Spec, error) { + return nil, errdefs.ErrNotImplemented +} + +// No extra spec options needed for windows. +func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + return nil, nil +} diff --git a/pkg/server/container_start.go b/pkg/server/container_start.go index 402a77ae5..e3b6f173e 100644 --- a/pkg/server/container_start.go +++ b/pkg/server/container_start.go @@ -25,7 +25,6 @@ import ( containerdio "github.com/containerd/containerd/cio" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/log" - "github.com/containerd/containerd/plugin" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/net/context" @@ -99,11 +98,7 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain return nil, errors.Wrap(err, "failed to get container info") } - var taskOpts []containerd.NewTaskOpts - // TODO(random-liu): Remove this after shim v1 is deprecated. - if c.config.NoPivot && ctrInfo.Runtime.Name == plugin.RuntimeLinuxV1 { - taskOpts = append(taskOpts, containerd.WithNoPivotRoot) - } + taskOpts := c.taskOpts(ctrInfo.Runtime.Name) task, err := container.NewTask(ctx, ioCreation, taskOpts...) if err != nil { return nil, errors.Wrap(err, "failed to create containerd task") diff --git a/pkg/server/container_stats.go b/pkg/server/container_stats.go index 1580c0400..802134d3a 100644 --- a/pkg/server/container_stats.go +++ b/pkg/server/container_stats.go @@ -25,6 +25,8 @@ import ( // ContainerStats returns stats of the container. If the container does not // exist, the call returns an error. +// TODO(windows): hcsshim Stats is not implemented, add windows support after +// that is implemented. func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerStatsRequest) (*runtime.ContainerStatsResponse, error) { cntr, err := c.containerStore.Get(in.GetContainerId()) if err != nil { @@ -39,7 +41,7 @@ func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerSt return nil, errors.Errorf("unexpected metrics response: %+v", resp.Metrics) } - cs, err := c.getContainerMetrics(cntr.Metadata, resp.Metrics[0]) + cs, err := c.containerMetrics(cntr.Metadata, resp.Metrics[0]) if err != nil { return nil, errors.Wrap(err, "failed to decode container metrics") } diff --git a/pkg/server/container_stats_list.go b/pkg/server/container_stats_list.go index ed697c269..2b84f4777 100644 --- a/pkg/server/container_stats_list.go +++ b/pkg/server/container_stats_list.go @@ -17,10 +17,8 @@ limitations under the License. package server import ( - "github.com/containerd/cgroups" tasks "github.com/containerd/containerd/api/services/tasks/v1" "github.com/containerd/containerd/api/types" - "github.com/containerd/typeurl" "github.com/pkg/errors" "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" @@ -58,7 +56,7 @@ func (c *criService) toCRIContainerStats( } containerStats := new(runtime.ListContainerStatsResponse) for _, cntr := range containers { - cs, err := c.getContainerMetrics(cntr.Metadata, statsMap[cntr.ID]) + cs, err := c.containerMetrics(cntr.Metadata, statsMap[cntr.ID]) if err != nil { return nil, errors.Wrapf(err, "failed to decode container metrics for %q", cntr.ID) } @@ -67,59 +65,6 @@ func (c *criService) toCRIContainerStats( return containerStats, nil } -func (c *criService) getContainerMetrics( - meta containerstore.Metadata, - stats *types.Metric, -) (*runtime.ContainerStats, error) { - var cs runtime.ContainerStats - var usedBytes, inodesUsed uint64 - sn, err := c.snapshotStore.Get(meta.ID) - // If snapshotstore doesn't have cached snapshot information - // set WritableLayer usage to zero - if err == nil { - usedBytes = sn.Size - inodesUsed = sn.Inodes - } - cs.WritableLayer = &runtime.FilesystemUsage{ - Timestamp: sn.Timestamp, - FsId: &runtime.FilesystemIdentifier{ - Mountpoint: c.imageFSPath, - }, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, - } - cs.Attributes = &runtime.ContainerAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - } - - if stats != nil { - s, err := typeurl.UnmarshalAny(stats.Data) - if err != nil { - return nil, errors.Wrap(err, "failed to extract container metrics") - } - metrics := s.(*cgroups.Metrics) - if metrics.CPU != nil && metrics.CPU.Usage != nil { - cs.Cpu = &runtime.CpuUsage{ - Timestamp: stats.Timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total}, - } - } - if metrics.Memory != nil && metrics.Memory.Usage != nil { - cs.Memory = &runtime.MemoryUsage{ - Timestamp: stats.Timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: getWorkingSet(metrics.Memory), - }, - } - } - } - - return &cs, nil -} - func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) { if cntr, err := c.containerStore.Get(filter.GetId()); err == nil { filter.Id = cntr.ID @@ -169,17 +114,3 @@ func matchLabelSelector(selector, labels map[string]string) bool { } return true } - -// getWorkingSet calculates workingset memory from cgroup memory stats. -// The caller should make sure memory is not nil. -// workingset = usage - total_inactive_file -func getWorkingSet(memory *cgroups.MemoryStat) uint64 { - if memory.Usage == nil { - return 0 - } - var workingSet uint64 - if memory.TotalInactiveFile < memory.Usage.Usage { - workingSet = memory.Usage.Usage - memory.TotalInactiveFile - } - return workingSet -} diff --git a/pkg/server/container_stats_list_unix.go b/pkg/server/container_stats_list_unix.go new file mode 100644 index 000000000..b1a2c5045 --- /dev/null +++ b/pkg/server/container_stats_list_unix.go @@ -0,0 +1,96 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "github.com/containerd/cgroups" + "github.com/containerd/containerd/api/types" + "github.com/containerd/typeurl" + "github.com/pkg/errors" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + containerstore "github.com/containerd/cri/pkg/store/container" +) + +func (c *criService) containerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + var cs runtime.ContainerStats + var usedBytes, inodesUsed uint64 + sn, err := c.snapshotStore.Get(meta.ID) + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + if err == nil { + usedBytes = sn.Size + inodesUsed = sn.Inodes + } + cs.WritableLayer = &runtime.FilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, + } + cs.Attributes = &runtime.ContainerAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + } + + if stats != nil { + s, err := typeurl.UnmarshalAny(stats.Data) + if err != nil { + return nil, errors.Wrap(err, "failed to extract container metrics") + } + metrics := s.(*cgroups.Metrics) + if metrics.CPU != nil && metrics.CPU.Usage != nil { + cs.Cpu = &runtime.CpuUsage{ + Timestamp: stats.Timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total}, + } + } + if metrics.Memory != nil && metrics.Memory.Usage != nil { + cs.Memory = &runtime.MemoryUsage{ + Timestamp: stats.Timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: getWorkingSet(metrics.Memory), + }, + } + } + } + + return &cs, nil +} + +// getWorkingSet calculates workingset memory from cgroup memory stats. +// The caller should make sure memory is not nil. +// workingset = usage - total_inactive_file +func getWorkingSet(memory *cgroups.MemoryStat) uint64 { + if memory.Usage == nil { + return 0 + } + var workingSet uint64 + if memory.TotalInactiveFile < memory.Usage.Usage { + workingSet = memory.Usage.Usage - memory.TotalInactiveFile + } + return workingSet +} diff --git a/pkg/server/container_stats_list_test.go b/pkg/server/container_stats_list_unix_test.go similarity index 98% rename from pkg/server/container_stats_list_test.go rename to pkg/server/container_stats_list_unix_test.go index 9774ee4b8..812d2f7f4 100644 --- a/pkg/server/container_stats_list_test.go +++ b/pkg/server/container_stats_list_unix_test.go @@ -1,3 +1,5 @@ +// +build !windows + /* Copyright 2018 The containerd Authors. diff --git a/pkg/server/container_stats_list_windows.go b/pkg/server/container_stats_list_windows.go new file mode 100644 index 000000000..05ed85355 --- /dev/null +++ b/pkg/server/container_stats_list_windows.go @@ -0,0 +1,35 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "github.com/containerd/containerd/api/types" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + containerstore "github.com/containerd/cri/pkg/store/container" +) + +// TODO(windows): Implement a dummy version of this, and actually support this +// when stats is supported by the hcs containerd shim. +func (c *criService) containerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + return nil, nil +} diff --git a/pkg/server/container_update_resources.go b/pkg/server/container_update_resources_unix.go similarity index 99% rename from pkg/server/container_update_resources.go rename to pkg/server/container_update_resources_unix.go index 2c9d03389..99fee93f2 100644 --- a/pkg/server/container_update_resources.go +++ b/pkg/server/container_update_resources_unix.go @@ -1,3 +1,5 @@ +// +build !windows + /* Copyright 2017 The Kubernetes Authors. diff --git a/pkg/server/container_update_resources_test.go b/pkg/server/container_update_resources_unix_test.go similarity index 99% rename from pkg/server/container_update_resources_test.go rename to pkg/server/container_update_resources_unix_test.go index 47650355d..0cf13e73d 100644 --- a/pkg/server/container_update_resources_test.go +++ b/pkg/server/container_update_resources_unix_test.go @@ -1,3 +1,5 @@ +// +build !windows + /* Copyright 2017 The Kubernetes Authors. diff --git a/pkg/server/container_update_resources_windows.go b/pkg/server/container_update_resources_windows.go new file mode 100644 index 000000000..ae3517ed2 --- /dev/null +++ b/pkg/server/container_update_resources_windows.go @@ -0,0 +1,31 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "github.com/containerd/containerd/errdefs" + "golang.org/x/net/context" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +// UpdateContainerResources updates ContainerConfig of the container. +// TODO(windows): Figure out whether windows support this. +func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (*runtime.UpdateContainerResourcesResponse, error) { + return nil, errdefs.ErrNotImplemented +} diff --git a/pkg/server/helpers.go b/pkg/server/helpers.go index f6a530472..8563a8921 100644 --- a/pkg/server/helpers.go +++ b/pkg/server/helpers.go @@ -20,7 +20,6 @@ import ( "fmt" "path" "path/filepath" - "regexp" "strconv" "strings" @@ -33,7 +32,6 @@ import ( "github.com/containerd/typeurl" "github.com/docker/distribution/reference" imagedigest "github.com/opencontainers/go-digest" - "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" @@ -51,6 +49,7 @@ const ( errorStartReason = "StartError" // errorStartExitCode is the exit code when fails to start container. // 128 is the same with Docker's behavior. + // TODO(windows): Figure out what should be used for windows. errorStartExitCode = 128 // completeExitReason is the exit reason when container exits with code 0. completeExitReason = "Completed" @@ -58,39 +57,16 @@ const ( errorExitReason = "Error" // oomExitReason is the exit reason when process in container is oom killed. oomExitReason = "OOMKilled" -) -const ( - // defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938). - defaultSandboxOOMAdj = -998 - // defaultShmSize is the default size of the sandbox shm. - defaultShmSize = int64(1024 * 1024 * 64) - // relativeRootfsPath is the rootfs path relative to bundle path. - relativeRootfsPath = "rootfs" // sandboxesDir contains all sandbox root. A sandbox root is the running // directory of the sandbox, all files created for the sandbox will be // placed under this directory. sandboxesDir = "sandboxes" // containersDir contains all container root. containersDir = "containers" - // According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html: - // "The search list is currently limited to six domains with a total of 256 characters." - maxDNSSearches = 6 // Delimiter used to construct container/sandbox names. nameDelimiter = "_" - // devShm is the default path of /dev/shm. - devShm = "/dev/shm" - // etcHosts is the default path of /etc/hosts file. - etcHosts = "/etc/hosts" - // etcHostname is the default path of /etc/hostname file. - etcHostname = "/etc/hostname" - // resolvConfPath is the abs path of resolv.conf on host or container. - resolvConfPath = "/etc/resolv.conf" - // hostnameEnv is the key for HOSTNAME env. - hostnameEnv = "HOSTNAME" -) -const ( // criContainerdPrefix is common prefix for cri-containerd criContainerdPrefix = "io.cri-containerd" // containerKindLabel is a label key indicating container is sandbox container or application container @@ -107,14 +83,9 @@ const ( sandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata" // containerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest containerMetadataExtension = criContainerdPrefix + ".container.metadata" -) -const ( // defaultIfName is the default network interface for the pods defaultIfName = "eth0" - // networkAttachCount is the minimum number of networks the PodSandbox - // attaches to - networkAttachCount = 2 ) // makeSandboxName generates sandbox name from sandbox metadata. The name @@ -141,17 +112,6 @@ func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetada }, nameDelimiter) } -// getCgroupsPath generates container cgroups path. -func getCgroupsPath(cgroupsParent, id string) string { - base := path.Base(cgroupsParent) - if strings.HasSuffix(base, ".slice") { - // For a.slice/b.slice/c.slice, base is c.slice. - // runc systemd cgroup path format is "slice:prefix:name". - return strings.Join([]string{base, "cri-containerd", id}, ":") - } - return filepath.Join(cgroupsParent, id) -} - // getSandboxRootDir returns the root directory for managing sandbox files, // e.g. hosts files. func (c *criService) getSandboxRootDir(id string) string { @@ -176,26 +136,6 @@ func (c *criService) getVolatileContainerRootDir(id string) string { return filepath.Join(c.config.StateDir, containersDir, id) } -// getSandboxHostname returns the hostname file path inside the sandbox root directory. -func (c *criService) getSandboxHostname(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "hostname") -} - -// getSandboxHosts returns the hosts file path inside the sandbox root directory. -func (c *criService) getSandboxHosts(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "hosts") -} - -// getResolvPath returns resolv.conf filepath for specified sandbox. -func (c *criService) getResolvPath(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "resolv.conf") -} - -// getSandboxDevShm returns the shm file path inside the sandbox root directory. -func (c *criService) getSandboxDevShm(id string) string { - return filepath.Join(c.getVolatileSandboxRootDir(id), "shm") -} - // criContainerStateToString formats CRI container state to string. func criContainerStateToString(state runtime.ContainerState) string { return runtime.ContainerState_name[int32(state)] @@ -298,49 +238,6 @@ func (c *criService) ensureImageExists(ctx context.Context, ref string, config * return &newImage, nil } -func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error) { - if selinuxOpt == nil { - return "", "", nil - } - - // Should ignored selinuxOpts if they are incomplete. - if selinuxOpt.GetUser() == "" || - selinuxOpt.GetRole() == "" || - selinuxOpt.GetType() == "" { - return "", "", nil - } - - // make sure the format of "level" is correct. - ok, err := checkSelinuxLevel(selinuxOpt.GetLevel()) - if err != nil || !ok { - return "", "", err - } - - labelOpts := fmt.Sprintf("%s:%s:%s:%s", - selinuxOpt.GetUser(), - selinuxOpt.GetRole(), - selinuxOpt.GetType(), - selinuxOpt.GetLevel()) - - options, err := label.DupSecOpt(labelOpts) - if err != nil { - return "", "", err - } - return label.InitLabels(options) -} - -func checkSelinuxLevel(level string) (bool, error) { - if len(level) == 0 { - return true, nil - } - - matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}((.c\d{1,4})?,c\d{1,4})*(.c\d{1,4})?(,c\d{1,4}(.c\d{1,4})?)*)?$`, level) - if err != nil || !matched { - return false, errors.Wrapf(err, "the format of 'level' %q is not correct", level) - } - return true, nil -} - // isInCRIMounts checks whether a destination is in CRI mount list. func isInCRIMounts(dst string, mounts []*runtime.Mount) bool { for _, m := range mounts { diff --git a/pkg/server/helpers_test.go b/pkg/server/helpers_test.go index cafbd9909..f461fd40e 100644 --- a/pkg/server/helpers_test.go +++ b/pkg/server/helpers_test.go @@ -114,39 +114,6 @@ func TestGetRepoDigestAndTag(t *testing.T) { } } -func TestGetCgroupsPath(t *testing.T) { - testID := "test-id" - for desc, test := range map[string]struct { - cgroupsParent string - expected string - }{ - "should support regular cgroup path": { - cgroupsParent: "/a/b", - expected: "/a/b/test-id", - }, - "should support systemd cgroup path": { - cgroupsParent: "/a.slice/b.slice", - expected: "b.slice:cri-containerd:test-id", - }, - "should support tailing slash for regular cgroup path": { - cgroupsParent: "/a/b/", - expected: "/a/b/test-id", - }, - "should support tailing slash for systemd cgroup path": { - cgroupsParent: "/a.slice/b.slice/", - expected: "b.slice:cri-containerd:test-id", - }, - "should treat root cgroup as regular cgroup path": { - cgroupsParent: "/", - expected: "/test-id", - }, - } { - t.Logf("TestCase %q", desc) - got := getCgroupsPath(test.cgroupsParent, testID) - assert.Equal(t, test.expected, got) - } -} - func TestBuildLabels(t *testing.T) { configLabels := map[string]string{ "a": "b", diff --git a/pkg/server/helpers_unix.go b/pkg/server/helpers_unix.go new file mode 100644 index 000000000..37739a702 --- /dev/null +++ b/pkg/server/helpers_unix.go @@ -0,0 +1,137 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "fmt" + "path" + "path/filepath" + "regexp" + "strings" + + runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor" + runcseccomp "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +const ( + // defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938). + defaultSandboxOOMAdj = -998 + // defaultShmSize is the default size of the sandbox shm. + defaultShmSize = int64(1024 * 1024 * 64) + // relativeRootfsPath is the rootfs path relative to bundle path. + relativeRootfsPath = "rootfs" + // According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html: + // "The search list is currently limited to six domains with a total of 256 characters." + maxDNSSearches = 6 + // devShm is the default path of /dev/shm. + devShm = "/dev/shm" + // etcHosts is the default path of /etc/hosts file. + etcHosts = "/etc/hosts" + // etcHostname is the default path of /etc/hostname file. + etcHostname = "/etc/hostname" + // resolvConfPath is the abs path of resolv.conf on host or container. + resolvConfPath = "/etc/resolv.conf" + // hostnameEnv is the key for HOSTNAME env. + hostnameEnv = "HOSTNAME" +) + +// getCgroupsPath generates container cgroups path. +func getCgroupsPath(cgroupsParent, id string) string { + base := path.Base(cgroupsParent) + if strings.HasSuffix(base, ".slice") { + // For a.slice/b.slice/c.slice, base is c.slice. + // runc systemd cgroup path format is "slice:prefix:name". + return strings.Join([]string{base, "cri-containerd", id}, ":") + } + return filepath.Join(cgroupsParent, id) +} + +// getSandboxHostname returns the hostname file path inside the sandbox root directory. +func (c *criService) getSandboxHostname(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "hostname") +} + +// getSandboxHosts returns the hosts file path inside the sandbox root directory. +func (c *criService) getSandboxHosts(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "hosts") +} + +// getResolvPath returns resolv.conf filepath for specified sandbox. +func (c *criService) getResolvPath(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "resolv.conf") +} + +// getSandboxDevShm returns the shm file path inside the sandbox root directory. +func (c *criService) getSandboxDevShm(id string) string { + return filepath.Join(c.getVolatileSandboxRootDir(id), "shm") +} + +func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error) { + if selinuxOpt == nil { + return "", "", nil + } + + // Should ignored selinuxOpts if they are incomplete. + if selinuxOpt.GetUser() == "" || + selinuxOpt.GetRole() == "" || + selinuxOpt.GetType() == "" { + return "", "", nil + } + + // make sure the format of "level" is correct. + ok, err := checkSelinuxLevel(selinuxOpt.GetLevel()) + if err != nil || !ok { + return "", "", err + } + + labelOpts := fmt.Sprintf("%s:%s:%s:%s", + selinuxOpt.GetUser(), + selinuxOpt.GetRole(), + selinuxOpt.GetType(), + selinuxOpt.GetLevel()) + + options, err := label.DupSecOpt(labelOpts) + if err != nil { + return "", "", err + } + return label.InitLabels(options) +} + +func checkSelinuxLevel(level string) (bool, error) { + if len(level) == 0 { + return true, nil + } + + matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}((.c\d{1,4})?,c\d{1,4})*(.c\d{1,4})?(,c\d{1,4}(.c\d{1,4})?)*)?$`, level) + if err != nil || !matched { + return false, errors.Wrapf(err, "the format of 'level' %q is not correct", level) + } + return true, nil +} + +func (c *criService) apparmorEnabled() bool { + return runcapparmor.IsEnabled() && !c.config.DisableApparmor +} + +func (c *criService) seccompEnabled() bool { + return runcseccomp.IsEnabled() +} diff --git a/pkg/server/helpers_unix_test.go b/pkg/server/helpers_unix_test.go new file mode 100644 index 000000000..be1b92b12 --- /dev/null +++ b/pkg/server/helpers_unix_test.go @@ -0,0 +1,58 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetCgroupsPath(t *testing.T) { + testID := "test-id" + for desc, test := range map[string]struct { + cgroupsParent string + expected string + }{ + "should support regular cgroup path": { + cgroupsParent: "/a/b", + expected: "/a/b/test-id", + }, + "should support systemd cgroup path": { + cgroupsParent: "/a.slice/b.slice", + expected: "b.slice:cri-containerd:test-id", + }, + "should support tailing slash for regular cgroup path": { + cgroupsParent: "/a/b/", + expected: "/a/b/test-id", + }, + "should support tailing slash for systemd cgroup path": { + cgroupsParent: "/a.slice/b.slice/", + expected: "b.slice:cri-containerd:test-id", + }, + "should treat root cgroup as regular cgroup path": { + cgroupsParent: "/", + expected: "/test-id", + }, + } { + t.Logf("TestCase %q", desc) + got := getCgroupsPath(test.cgroupsParent, testID) + assert.Equal(t, test.expected, got) + } +} diff --git a/pkg/server/imagefs_info.go b/pkg/server/imagefs_info.go index 30cf9623c..1ee355154 100644 --- a/pkg/server/imagefs_info.go +++ b/pkg/server/imagefs_info.go @@ -25,6 +25,7 @@ import ( ) // ImageFsInfo returns information of the filesystem that is used to store images. +// TODO(windows): Usage for windows is always 0 right now. Support this for windows. func (c *criService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) { snapshots := c.snapshotStore.List() timestamp := time.Now().UnixNano() diff --git a/pkg/server/io/helpers.go b/pkg/server/io/helpers.go index 6ca5931f2..4e797c9ef 100644 --- a/pkg/server/io/helpers.go +++ b/pkg/server/io/helpers.go @@ -24,7 +24,6 @@ import ( "syscall" "github.com/containerd/containerd/cio" - "github.com/containerd/fifo" "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) @@ -113,7 +112,7 @@ func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) { }() if fifos.Stdin != "" { - if f, err = fifo.OpenFifo(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { + if f, err = openFifo(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { return nil, nil, err } p.stdin = f @@ -121,7 +120,7 @@ func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) { } if fifos.Stdout != "" { - if f, err = fifo.OpenFifo(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { + if f, err = openFifo(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { return nil, nil, err } p.stdout = f @@ -129,7 +128,7 @@ func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) { } if fifos.Stderr != "" { - if f, err = fifo.OpenFifo(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { + if f, err = openFifo(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { return nil, nil, err } p.stderr = f diff --git a/pkg/server/io/helpers_unix.go b/pkg/server/io/helpers_unix.go new file mode 100644 index 000000000..ee5dc252c --- /dev/null +++ b/pkg/server/io/helpers_unix.go @@ -0,0 +1,31 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package io + +import ( + "io" + "os" + + "github.com/containerd/fifo" + "golang.org/x/net/context" +) + +func openFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) { + return fifo.OpenFifo(ctx, fn, flag, perm) +} diff --git a/pkg/server/io/helpers_windows.go b/pkg/server/io/helpers_windows.go new file mode 100644 index 000000000..9ea6a4f2b --- /dev/null +++ b/pkg/server/io/helpers_windows.go @@ -0,0 +1,31 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package io + +import ( + "io" + "os" + + "golang.org/x/net/context" +) + +// TODO(windows): Add windows FIFO support. +func openFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) { + return nil, nil +} diff --git a/pkg/server/restart.go b/pkg/server/restart.go index 30b1fa625..80578dd02 100644 --- a/pkg/server/restart.go +++ b/pkg/server/restart.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "os" "path/filepath" + goruntime "runtime" "time" "github.com/containerd/containerd" @@ -407,7 +408,8 @@ func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) sandbox.Container = cntr // Load network namespace. - if meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + if goruntime.GOOS != "windows" && + meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { // Don't need to load netns for host network sandbox. return sandbox, nil } diff --git a/pkg/server/sandbox_portforward.go b/pkg/server/sandbox_portforward_unix.go similarity index 97% rename from pkg/server/sandbox_portforward.go rename to pkg/server/sandbox_portforward_unix.go index f972ce207..26dbc4075 100644 --- a/pkg/server/sandbox_portforward.go +++ b/pkg/server/sandbox_portforward_unix.go @@ -1,3 +1,5 @@ +// +build !windows + /* Copyright 2017 The Kubernetes Authors. @@ -47,7 +49,7 @@ func (c *criService) PortForward(ctx context.Context, r *runtime.PortForwardRequ } // portForward requires `socat` on the node. It uses netns to enter the sandbox namespace, -// and run `socat` insidethe namespace to forward stream for a specific port. The `socat` +// and run `socat` inside the namespace to forward stream for a specific port. The `socat` // command keeps running until it exits or client disconnect. func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error { s, err := c.sandboxStore.Get(id) diff --git a/pkg/server/sandbox_portforward_windows.go b/pkg/server/sandbox_portforward_windows.go new file mode 100644 index 000000000..52ff9cd59 --- /dev/null +++ b/pkg/server/sandbox_portforward_windows.go @@ -0,0 +1,37 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "io" + + "github.com/containerd/containerd/errdefs" + "golang.org/x/net/context" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +// PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address. +// TODO(windows): Implement this for windows. +func (c *criService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (*runtime.PortForwardResponse, error) { + return nil, errdefs.ErrNotImplemented +} + +func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error { + return errdefs.ErrNotImplemented +} diff --git a/pkg/server/sandbox_run.go b/pkg/server/sandbox_run.go index 761af9e46..fc6ff0786 100644 --- a/pkg/server/sandbox_run.go +++ b/pkg/server/sandbox_run.go @@ -18,26 +18,20 @@ package server import ( "encoding/json" - "fmt" "math" - "os" + goruntime "runtime" "strings" "github.com/containerd/containerd" containerdio "github.com/containerd/containerd/cio" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/log" - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/plugin" cni "github.com/containerd/go-cni" "github.com/containerd/typeurl" "github.com/davecgh/go-spew/spew" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/net/context" - "golang.org/x/sys/unix" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "k8s.io/kubernetes/pkg/util/bandwidth" @@ -110,10 +104,14 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } log.G(ctx).Debugf("Use OCI %+v for sandbox %q", ociRuntime, id) - securityContext := config.GetLinux().GetSecurityContext() - //Create Network Namespace if it is not in host network - hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE - if !hostNet { + podNetwork := true + // Pod network is always needed on windows. + if goruntime.GOOS != "windows" && + config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + // Pod network is not needed on linux with host network. + podNetwork = false + } + if podNetwork { // If it is not in host network namespace then create a namespace and set the sandbox // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network // namespaces. If the pod is in host network namespace then both are empty and should not @@ -154,39 +152,19 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } // Create sandbox container. - spec, err := c.generateSandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations) + // NOTE: sandboxContainerSpec SHOULD NOT have side + // effect, e.g. accessing/creating files, so that we can test + // it safely. + spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations) if err != nil { return nil, errors.Wrap(err, "failed to generate sandbox container spec") } log.G(ctx).Debugf("Sandbox container %q spec: %#+v", id, spew.NewFormatter(spec)) - var specOpts []oci.SpecOpts - userstr, err := generateUserString( - "", - securityContext.GetRunAsUser(), - securityContext.GetRunAsGroup(), - ) + // Generate spec options that will be applied to the spec later. + specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config) if err != nil { - return nil, errors.Wrap(err, "failed to generate user string") - } - if userstr == "" { - // Lastly, since no user override was passed via CRI try to set via OCI - // Image - userstr = image.ImageSpec.Config.User - } - if userstr != "" { - specOpts = append(specOpts, oci.WithUser(userstr)) - } - - seccompSpecOpts, err := generateSeccompSpecOpts( - securityContext.GetSeccompProfilePath(), - securityContext.GetPrivileged(), - c.seccompEnabled) - if err != nil { - return nil, errors.Wrap(err, "failed to generate seccomp spec opts") - } - if seccompSpecOpts != nil { - specOpts = append(specOpts, seccompSpecOpts) + return nil, errors.Wrap(err, "failed to generate sanbdox container spec options") } sandboxLabels := buildLabels(config.Labels, containerKindSandbox) @@ -247,14 +225,14 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } }() - // Setup sandbox /dev/shm, /etc/hosts, /etc/resolv.conf and /etc/hostname. + // Setup files required for the sandbox. if err = c.setupSandboxFiles(id, config); err != nil { return nil, errors.Wrapf(err, "failed to setup sandbox files") } defer func() { if retErr != nil { - if err = c.unmountSandboxFiles(id, config); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to unmount sandbox files in %q", + if err = c.cleanupSandboxFiles(id, config); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to cleanup sandbox files in %q", sandboxRootDir) } } @@ -270,11 +248,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).", id, name) - var taskOpts []containerd.NewTaskOpts - // TODO(random-liu): Remove this after shim v1 is deprecated. - if c.config.NoPivot && ociRuntime.Type == plugin.RuntimeRuncV1 { - taskOpts = append(taskOpts, containerd.WithNoPivotRoot) - } + taskOpts := c.taskOpts(ociRuntime.Type) // We don't need stdio for sandbox container. task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...) if err != nil { @@ -328,222 +302,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil } -func (c *criService) generateSandboxContainerSpec(id string, config *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) { - // Creates a spec Generator with the default spec. - // TODO(random-liu): [P1] Compare the default settings with docker and containerd default. - specOpts := []oci.SpecOpts{ - customopts.WithoutRunMount, - customopts.WithoutDefaultSecuritySettings, - customopts.WithRelativeRoot(relativeRootfsPath), - oci.WithEnv(imageConfig.Env), - oci.WithRootFSReadonly(), - oci.WithHostname(config.GetHostname()), - } - if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 { - // Pause image must have entrypoint or cmd. - return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig) - } - specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...)) - - // TODO(random-liu): [P2] Consider whether to add labels and annotations to the container. - - // Set cgroups parent. - if c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDisabledCgroups) - } else { - if config.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id) - specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) - } - } - - // When cgroup parent is not set, containerd-shim will create container in a child cgroup - // of the cgroup itself is in. - // TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified. - - // Set namespace options. - var ( - securityContext = config.GetLinux().GetSecurityContext() - nsOptions = securityContext.GetNamespaceOptions() - ) - if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE { - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace)) - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace)) - } else { - //TODO(Abhi): May be move this to containerd spec opts (WithLinuxSpaceOption) - specOpts = append(specOpts, oci.WithLinuxNamespace( - runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - Path: nsPath, - })) - } - if nsOptions.GetPid() == runtime.NamespaceMode_NODE { - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace)) - } - if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) - } - - // It's fine to generate the spec before the sandbox /dev/shm - // is actually created. - sandboxDevShm := c.getSandboxDevShm(id) - if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { - sandboxDevShm = devShm - } - specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{ - { - Source: sandboxDevShm, - Destination: devShm, - Type: "bind", - Options: []string{"rbind", "ro"}, - }, - })) - - selinuxOpt := securityContext.GetSelinuxOptions() - processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt) - if err != nil { - return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions()) - } - - supplementalGroups := securityContext.GetSupplementalGroups() - specOpts = append(specOpts, - customopts.WithSelinuxLabels(processLabel, mountLabel), - customopts.WithSupplementalGroups(supplementalGroups), - ) - - // Add sysctls - sysctls := config.GetLinux().GetSysctls() - specOpts = append(specOpts, customopts.WithSysctls(sysctls)) - - // Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile - - if !c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDefaultSandboxShares) - } - specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj)) - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - runtimePodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, - customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox), - customopts.WithAnnotation(annotations.SandboxID, id), - customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()), - ) - - return runtimeSpec(id, specOpts...) -} - -// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts, -// /etc/resolv.conf and /etc/hostname. -func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - sandboxEtcHostname := c.getSandboxHostname(id) - hostname := config.GetHostname() - if hostname == "" { - var err error - hostname, err = c.os.Hostname() - if err != nil { - return errors.Wrap(err, "failed to get hostname") - } - } - if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil { - return errors.Wrapf(err, "failed to write hostname to %q", sandboxEtcHostname) - } - - // TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet. - sandboxEtcHosts := c.getSandboxHosts(id) - if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil { - return errors.Wrapf(err, "failed to generate sandbox hosts file %q", sandboxEtcHosts) - } - - // Set DNS options. Maintain a resolv.conf for the sandbox. - var err error - resolvContent := "" - if dnsConfig := config.GetDnsConfig(); dnsConfig != nil { - resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options) - if err != nil { - return errors.Wrapf(err, "failed to parse sandbox DNSConfig %+v", dnsConfig) - } - } - resolvPath := c.getResolvPath(id) - if resolvContent == "" { - // copy host's resolv.conf to resolvPath - err = c.os.CopyFile(resolvConfPath, resolvPath, 0644) - if err != nil { - return errors.Wrapf(err, "failed to copy host's resolv.conf to %q", resolvPath) - } - } else { - err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644) - if err != nil { - return errors.Wrapf(err, "failed to write resolv content to %q", resolvPath) - } - } - - // Setup sandbox /dev/shm. - if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { - if _, err := c.os.Stat(devShm); err != nil { - return errors.Wrapf(err, "host %q is not available for host ipc", devShm) - } - } else { - sandboxDevShm := c.getSandboxDevShm(id) - if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil { - return errors.Wrap(err, "failed to create sandbox shm") - } - shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize) - if err := c.os.Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil { - return errors.Wrap(err, "failed to mount sandbox shm") - } - } - - return nil -} - -// parseDNSOptions parse DNS options into resolv.conf format content, -// if none option is specified, will return empty with no error. -func parseDNSOptions(servers, searches, options []string) (string, error) { - resolvContent := "" - - if len(searches) > maxDNSSearches { - return "", errors.Errorf("DNSOption.Searches has more than %d domains", maxDNSSearches) - } - - if len(searches) > 0 { - resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " ")) - } - - if len(servers) > 0 { - resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver ")) - } - - if len(options) > 0 { - resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " ")) - } - - return resolvContent, nil -} - -// unmountSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to -// remove these files. Unmount should *NOT* return error if the mount point is already unmounted. -func (c *criService) unmountSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE { - path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/") - if err != nil { - return errors.Wrap(err, "failed to follow symlink") - } - if err := c.os.Unmount(path); err != nil && !os.IsNotExist(err) { - return errors.Wrapf(err, "failed to unmount %q", path) - } - } - return nil -} - // setupPod setups up the network for a pod func (c *criService) setupPod(ctx context.Context, id string, path string, config *runtime.PodSandboxConfig) (string, *cni.CNIResult, error) { if c.netPlugin == nil { @@ -713,6 +471,7 @@ func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtime // Note: If the workload is marked untrusted but requests privileged, this can be granted, as the // runtime may support this. For example, in a virtual-machine isolated runtime, privileged // is a supported option, granting the workload to access the entire guest VM instead of host. + // TODO(windows): Deprecate this so that we don't need to handle it for windows. if hostAccessingSandbox(config) { return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed") } diff --git a/pkg/server/sandbox_run_test.go b/pkg/server/sandbox_run_test.go index 7398a2d0d..c40ca3ed4 100644 --- a/pkg/server/sandbox_run_test.go +++ b/pkg/server/sandbox_run_test.go @@ -18,449 +18,16 @@ package server import ( "net" - "os" - "path/filepath" "testing" cni "github.com/containerd/go-cni" - "github.com/containerd/typeurl" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "github.com/containerd/cri/pkg/annotations" criconfig "github.com/containerd/cri/pkg/config" - "github.com/containerd/cri/pkg/containerd/opts" - ostesting "github.com/containerd/cri/pkg/os/testing" - sandboxstore "github.com/containerd/cri/pkg/store/sandbox" ) -func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { - config := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-name", - Uid: "test-uid", - Namespace: "test-ns", - Attempt: 1, - }, - Hostname: "test-hostname", - LogDirectory: "test-log-directory", - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - Linux: &runtime.LinuxPodSandboxConfig{ - CgroupParent: "/test/cgroup/parent", - }, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"a=b", "c=d"}, - Entrypoint: []string{"/pause"}, - Cmd: []string{"forever"}, - WorkingDir: "/workspace", - } - specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { - assert.Equal(t, "test-hostname", spec.Hostname) - assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) - assert.Equal(t, relativeRootfsPath, spec.Root.Path) - assert.Equal(t, true, spec.Root.Readonly) - assert.Contains(t, spec.Process.Env, "a=b", "c=d") - assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args) - assert.Equal(t, "/workspace", spec.Process.Cwd) - assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, opts.DefaultSandboxCPUshares) - assert.EqualValues(t, *spec.Process.OOMScoreAdj, defaultSandboxOOMAdj) - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox) - - assert.Contains(t, spec.Annotations, annotations.SandboxLogDir) - assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory") - } - return config, imageConfig, specCheck -} - -func TestGenerateSandboxContainerSpec(t *testing.T) { - testID := "test-id" - nsPath := "test-cni" - for desc, test := range map[string]struct { - configChange func(*runtime.PodSandboxConfig) - podAnnotations []string - imageConfigChange func(*imagespec.ImageConfig) - specCheck func(*testing.T, *runtimespec.Spec) - expectErr bool - }{ - "spec should reflect original config": { - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - // runtime spec should have expected namespaces enabled by default. - require.NotNil(t, spec.Linux) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - Path: nsPath, - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UTSNamespace, - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.IPCNamespace, - }) - }, - }, - "host namespace": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_NODE, - Ipc: runtime.NamespaceMode_NODE, - }, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - // runtime spec should disable expected namespaces in host mode. - require.NotNil(t, spec.Linux) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UTSNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.IPCNamespace, - }) - }, - }, - "should return error when entrypoint and cmd are empty": { - imageConfigChange: func(c *imagespec.ImageConfig) { - c.Entrypoint = nil - c.Cmd = nil - }, - expectErr: true, - }, - "should set supplemental groups correctly": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - SupplementalGroups: []int64{1111, 2222}, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - require.NotNil(t, spec.Process) - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) - }, - }, - "a passthrough annotation should be passed as an OCI annotation": { - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - }, - }, - "a non-passthrough annotation should not be passed as an OCI annotation": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["d"] = "e" - }, - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - _, ok := spec.Annotations["d"] - assert.False(t, ok) - }, - }, - "passthrough annotations should support wildcard match": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["t.f"] = "j" - c.Annotations["z.g"] = "o" - c.Annotations["z"] = "o" - c.Annotations["y.ca"] = "b" - c.Annotations["y"] = "b" - }, - podAnnotations: []string{"t*", "z.*", "y.c*"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["t.f"], "j") - assert.Equal(t, spec.Annotations["z.g"], "o") - assert.Equal(t, spec.Annotations["y.ca"], "b") - _, ok := spec.Annotations["y"] - assert.False(t, ok) - _, ok = spec.Annotations["z"] - assert.False(t, ok) - }, - }, - } { - t.Logf("TestCase %q", desc) - c := newTestCRIService() - config, imageConfig, specCheck := getRunPodSandboxTestData() - if test.configChange != nil { - test.configChange(config) - } - - if test.imageConfigChange != nil { - test.imageConfigChange(imageConfig) - } - spec, err := c.generateSandboxContainerSpec(testID, config, imageConfig, nsPath, - test.podAnnotations) - if test.expectErr { - assert.Error(t, err) - assert.Nil(t, spec) - continue - } - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, spec) - if test.specCheck != nil { - test.specCheck(t, spec) - } - } -} - -func TestSetupSandboxFiles(t *testing.T) { - const ( - testID = "test-id" - realhostname = "test-real-hostname" - ) - for desc, test := range map[string]struct { - dnsConfig *runtime.DNSConfig - hostname string - ipcMode runtime.NamespaceMode - expectedCalls []ostesting.CalledDetail - }{ - "should check host /dev/shm existence when ipc mode is NODE": { - ipcMode: runtime.NamespaceMode_NODE, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "Hostname", - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte(realhostname + "\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/resolv.conf", - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - os.FileMode(0644), - }, - }, - { - Name: "Stat", - Arguments: []interface{}{"/dev/shm"}, - }, - }, - }, - "should create new /etc/resolv.conf if DNSOptions is set": { - dnsConfig: &runtime.DNSConfig{ - Servers: []string{"8.8.8.8"}, - Searches: []string{"114.114.114.114"}, - Options: []string{"timeout:1"}, - }, - ipcMode: runtime.NamespaceMode_NODE, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "Hostname", - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte(realhostname + "\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - []byte(`search 114.114.114.114 -nameserver 8.8.8.8 -options timeout:1 -`), os.FileMode(0644), - }, - }, - { - Name: "Stat", - Arguments: []interface{}{"/dev/shm"}, - }, - }, - }, - "should create sandbox shm when ipc namespace mode is not NODE": { - ipcMode: runtime.NamespaceMode_POD, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "Hostname", - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte(realhostname + "\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/resolv.conf", - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - os.FileMode(0644), - }, - }, - { - Name: "MkdirAll", - Arguments: []interface{}{ - filepath.Join(testStateDir, sandboxesDir, testID, "shm"), - os.FileMode(0700), - }, - }, - { - Name: "Mount", - // Ignore arguments which are too complex to check. - }, - }, - }, - "should create /etc/hostname when hostname is set": { - hostname: "test-hostname", - ipcMode: runtime.NamespaceMode_NODE, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte("test-hostname\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/resolv.conf", - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - os.FileMode(0644), - }, - }, - { - Name: "Stat", - Arguments: []interface{}{"/dev/shm"}, - }, - }, - }, - } { - t.Logf("TestCase %q", desc) - c := newTestCRIService() - c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { - return realhostname, nil - } - cfg := &runtime.PodSandboxConfig{ - Hostname: test.hostname, - DnsConfig: test.dnsConfig, - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Ipc: test.ipcMode, - }, - }, - }, - } - c.setupSandboxFiles(testID, cfg) - calls := c.os.(*ostesting.FakeOS).GetCalls() - assert.Len(t, calls, len(test.expectedCalls)) - for i, expected := range test.expectedCalls { - if expected.Arguments == nil { - // Ignore arguments. - expected.Arguments = calls[i].Arguments - } - assert.Equal(t, expected, calls[i]) - } - } -} - -func TestParseDNSOption(t *testing.T) { - for desc, test := range map[string]struct { - servers []string - searches []string - options []string - expectedContent string - expectErr bool - }{ - "empty dns options should return empty content": {}, - "non-empty dns options should return correct content": { - servers: []string{"8.8.8.8", "server.google.com"}, - searches: []string{"114.114.114.114"}, - options: []string{"timeout:1"}, - expectedContent: `search 114.114.114.114 -nameserver 8.8.8.8 -nameserver server.google.com -options timeout:1 -`, - }, - "should return error if dns search exceeds limit(6)": { - searches: []string{ - "server0.google.com", - "server1.google.com", - "server2.google.com", - "server3.google.com", - "server4.google.com", - "server5.google.com", - "server6.google.com", - }, - expectErr: true, - }, - } { - t.Logf("TestCase %q", desc) - resolvContent, err := parseDNSOptions(test.servers, test.searches, test.options) - if test.expectErr { - assert.Error(t, err) - continue - } - assert.NoError(t, err) - assert.Equal(t, resolvContent, test.expectedContent) - } -} - func TestToCNIPortMappings(t *testing.T) { for desc, test := range map[string]struct { criPortMappings []*runtime.PortMapping @@ -575,46 +142,6 @@ func TestSelectPodIP(t *testing.T) { } } -func TestTypeurlMarshalUnmarshalSandboxMeta(t *testing.T) { - for desc, test := range map[string]struct { - configChange func(*runtime.PodSandboxConfig) - }{ - "should marshal original config": {}, - "should marshal Linux": { - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_NODE, - Ipc: runtime.NamespaceMode_NODE, - }, - SupplementalGroups: []int64{1111, 2222}, - } - }, - }, - } { - t.Logf("TestCase %q", desc) - meta := &sandboxstore.Metadata{ - ID: "1", - Name: "sandbox_1", - NetNSPath: "/home/cloud", - } - meta.Config, _, _ = getRunPodSandboxTestData() - if test.configChange != nil { - test.configChange(meta.Config) - } - - any, err := typeurl.MarshalAny(meta) - assert.NoError(t, err) - data, err := typeurl.UnmarshalAny(any) - assert.NoError(t, err) - assert.IsType(t, &sandboxstore.Metadata{}, data) - curMeta, ok := data.(*sandboxstore.Metadata) - assert.True(t, ok) - assert.Equal(t, meta, curMeta) - } -} - func TestHostAccessingSandbox(t *testing.T) { privilegedContext := &runtime.PodSandboxConfig{ Linux: &runtime.LinuxPodSandboxConfig{ @@ -823,21 +350,3 @@ func TestGetSandboxRuntime(t *testing.T) { }) } } - -func TestSandboxDisableCgroup(t *testing.T) { - config, imageConfig, _ := getRunPodSandboxTestData() - c := newTestCRIService() - c.config.DisableCgroup = true - spec, err := c.generateSandboxContainerSpec("test-id", config, imageConfig, "test-cni", []string{}) - require.NoError(t, err) - - t.Log("resource limit should not be set") - assert.Nil(t, spec.Linux.Resources.Memory) - assert.Nil(t, spec.Linux.Resources.CPU) - - t.Log("cgroup path should be empty") - assert.Empty(t, spec.Linux.CgroupsPath) -} - -// TODO(random-liu): [P1] Add unit test for different error cases to make sure -// the function cleans up on error properly. diff --git a/pkg/server/sandbox_run_unix.go b/pkg/server/sandbox_run_unix.go new file mode 100644 index 000000000..9bcd94d1b --- /dev/null +++ b/pkg/server/sandbox_run_unix.go @@ -0,0 +1,298 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "fmt" + "os" + "strings" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/plugin" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "golang.org/x/sys/unix" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + "github.com/containerd/cri/pkg/annotations" + customopts "github.com/containerd/cri/pkg/containerd/opts" + osinterface "github.com/containerd/cri/pkg/os" +) + +func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) { + // Creates a spec Generator with the default spec. + // TODO(random-liu): [P1] Compare the default settings with docker and containerd default. + specOpts := []oci.SpecOpts{ + customopts.WithoutRunMount, + customopts.WithoutDefaultSecuritySettings, + customopts.WithRelativeRoot(relativeRootfsPath), + oci.WithEnv(imageConfig.Env), + oci.WithRootFSReadonly(), + oci.WithHostname(config.GetHostname()), + } + if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 { + // Pause image must have entrypoint or cmd. + return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig) + } + specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...)) + + // Set cgroups parent. + if c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDisabledCgroups) + } else { + if config.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id) + specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) + } + } + + // When cgroup parent is not set, containerd-shim will create container in a child cgroup + // of the cgroup itself is in. + // TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified. + + // Set namespace options. + var ( + securityContext = config.GetLinux().GetSecurityContext() + nsOptions = securityContext.GetNamespaceOptions() + ) + if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE { + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace)) + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace)) + } else { + specOpts = append(specOpts, oci.WithLinuxNamespace( + runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + Path: nsPath, + })) + } + if nsOptions.GetPid() == runtime.NamespaceMode_NODE { + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace)) + } + if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) + } + + // It's fine to generate the spec before the sandbox /dev/shm + // is actually created. + sandboxDevShm := c.getSandboxDevShm(id) + if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { + sandboxDevShm = devShm + } + specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{ + { + Source: sandboxDevShm, + Destination: devShm, + Type: "bind", + Options: []string{"rbind", "ro"}, + }, + })) + + selinuxOpt := securityContext.GetSelinuxOptions() + processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt) + if err != nil { + return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions()) + } + + supplementalGroups := securityContext.GetSupplementalGroups() + specOpts = append(specOpts, + customopts.WithSelinuxLabels(processLabel, mountLabel), + customopts.WithSupplementalGroups(supplementalGroups), + ) + + // Add sysctls + sysctls := config.GetLinux().GetSysctls() + specOpts = append(specOpts, customopts.WithSysctls(sysctls)) + + // Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile + + if !c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDefaultSandboxShares) + } + specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj)) + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + runtimePodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, + customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox), + customopts.WithAnnotation(annotations.SandboxID, id), + customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()), + ) + + return runtimeSpec(id, specOpts...) +} + +// sandboxContainerSpecOpts generates OCI spec options for +// the sandbox container. +func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + var ( + securityContext = config.GetLinux().GetSecurityContext() + specOpts []oci.SpecOpts + ) + seccompSpecOpts, err := generateSeccompSpecOpts( + securityContext.GetSeccompProfilePath(), + securityContext.GetPrivileged(), + c.seccompEnabled()) + if err != nil { + return nil, errors.Wrap(err, "failed to generate seccomp spec opts") + } + if seccompSpecOpts != nil { + specOpts = append(specOpts, seccompSpecOpts) + } + + userstr, err := generateUserString( + "", + securityContext.GetRunAsUser(), + securityContext.GetRunAsGroup(), + ) + if err != nil { + return nil, errors.Wrap(err, "failed to generate user string") + } + if userstr == "" { + // Lastly, since no user override was passed via CRI try to set via OCI + // Image + userstr = imageConfig.User + } + if userstr != "" { + specOpts = append(specOpts, oci.WithUser(userstr)) + } + return specOpts, nil +} + +// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts, +// /etc/resolv.conf and /etc/hostname. +func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + sandboxEtcHostname := c.getSandboxHostname(id) + hostname := config.GetHostname() + if hostname == "" { + var err error + hostname, err = c.os.Hostname() + if err != nil { + return errors.Wrap(err, "failed to get hostname") + } + } + if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil { + return errors.Wrapf(err, "failed to write hostname to %q", sandboxEtcHostname) + } + + // TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet. + sandboxEtcHosts := c.getSandboxHosts(id) + if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil { + return errors.Wrapf(err, "failed to generate sandbox hosts file %q", sandboxEtcHosts) + } + + // Set DNS options. Maintain a resolv.conf for the sandbox. + var err error + resolvContent := "" + if dnsConfig := config.GetDnsConfig(); dnsConfig != nil { + resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options) + if err != nil { + return errors.Wrapf(err, "failed to parse sandbox DNSConfig %+v", dnsConfig) + } + } + resolvPath := c.getResolvPath(id) + if resolvContent == "" { + // copy host's resolv.conf to resolvPath + err = c.os.CopyFile(resolvConfPath, resolvPath, 0644) + if err != nil { + return errors.Wrapf(err, "failed to copy host's resolv.conf to %q", resolvPath) + } + } else { + err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644) + if err != nil { + return errors.Wrapf(err, "failed to write resolv content to %q", resolvPath) + } + } + + // Setup sandbox /dev/shm. + if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { + if _, err := c.os.Stat(devShm); err != nil { + return errors.Wrapf(err, "host %q is not available for host ipc", devShm) + } + } else { + sandboxDevShm := c.getSandboxDevShm(id) + if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil { + return errors.Wrap(err, "failed to create sandbox shm") + } + shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize) + if err := c.os.(osinterface.UNIX).Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil { + return errors.Wrap(err, "failed to mount sandbox shm") + } + } + + return nil +} + +// parseDNSOptions parse DNS options into resolv.conf format content, +// if none option is specified, will return empty with no error. +func parseDNSOptions(servers, searches, options []string) (string, error) { + resolvContent := "" + + if len(searches) > maxDNSSearches { + return "", errors.Errorf("DNSOption.Searches has more than %d domains", maxDNSSearches) + } + + if len(searches) > 0 { + resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " ")) + } + + if len(servers) > 0 { + resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver ")) + } + + if len(options) > 0 { + resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " ")) + } + + return resolvContent, nil +} + +// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to +// remove these files. Unmount should *NOT* return error if the mount point is already unmounted. +func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE { + path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/") + if err != nil { + return errors.Wrap(err, "failed to follow symlink") + } + if err := c.os.(osinterface.UNIX).Unmount(path); err != nil && !os.IsNotExist(err) { + return errors.Wrapf(err, "failed to unmount %q", path) + } + } + return nil +} + +// taskOpts generates task options for a (sandbox) container. +func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { + // TODO(random-liu): Remove this after shim v1 is deprecated. + var taskOpts []containerd.NewTaskOpts + if c.config.NoPivot && runtimeType == plugin.RuntimeRuncV1 { + taskOpts = append(taskOpts, containerd.WithNoPivotRoot) + } + return taskOpts +} diff --git a/pkg/server/sandbox_run_unix_test.go b/pkg/server/sandbox_run_unix_test.go new file mode 100644 index 000000000..151850564 --- /dev/null +++ b/pkg/server/sandbox_run_unix_test.go @@ -0,0 +1,520 @@ +// +build !windows + +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "os" + "path/filepath" + "testing" + + "github.com/containerd/typeurl" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + "github.com/containerd/cri/pkg/annotations" + "github.com/containerd/cri/pkg/containerd/opts" + ostesting "github.com/containerd/cri/pkg/os/testing" + sandboxstore "github.com/containerd/cri/pkg/store/sandbox" +) + +func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { + config := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-name", + Uid: "test-uid", + Namespace: "test-ns", + Attempt: 1, + }, + Hostname: "test-hostname", + LogDirectory: "test-log-directory", + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + Linux: &runtime.LinuxPodSandboxConfig{ + CgroupParent: "/test/cgroup/parent", + }, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"a=b", "c=d"}, + Entrypoint: []string{"/pause"}, + Cmd: []string{"forever"}, + WorkingDir: "/workspace", + } + specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { + assert.Equal(t, "test-hostname", spec.Hostname) + assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) + assert.Equal(t, relativeRootfsPath, spec.Root.Path) + assert.Equal(t, true, spec.Root.Readonly) + assert.Contains(t, spec.Process.Env, "a=b", "c=d") + assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args) + assert.Equal(t, "/workspace", spec.Process.Cwd) + assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, opts.DefaultSandboxCPUshares) + assert.EqualValues(t, *spec.Process.OOMScoreAdj, defaultSandboxOOMAdj) + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox) + + assert.Contains(t, spec.Annotations, annotations.SandboxLogDir) + assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory") + } + return config, imageConfig, specCheck +} + +func TestSandboxContainerSpec(t *testing.T) { + testID := "test-id" + nsPath := "test-cni" + for desc, test := range map[string]struct { + configChange func(*runtime.PodSandboxConfig) + podAnnotations []string + imageConfigChange func(*imagespec.ImageConfig) + specCheck func(*testing.T, *runtimespec.Spec) + expectErr bool + }{ + "spec should reflect original config": { + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + // runtime spec should have expected namespaces enabled by default. + require.NotNil(t, spec.Linux) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + Path: nsPath, + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UTSNamespace, + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.IPCNamespace, + }) + }, + }, + "host namespace": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_NODE, + Ipc: runtime.NamespaceMode_NODE, + }, + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + // runtime spec should disable expected namespaces in host mode. + require.NotNil(t, spec.Linux) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UTSNamespace, + }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.IPCNamespace, + }) + }, + }, + "should return error when entrypoint and cmd are empty": { + imageConfigChange: func(c *imagespec.ImageConfig) { + c.Entrypoint = nil + c.Cmd = nil + }, + expectErr: true, + }, + "should set supplemental groups correctly": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + SupplementalGroups: []int64{1111, 2222}, + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + require.NotNil(t, spec.Process) + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) + }, + }, + "a passthrough annotation should be passed as an OCI annotation": { + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + }, + }, + "a non-passthrough annotation should not be passed as an OCI annotation": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["d"] = "e" + }, + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + _, ok := spec.Annotations["d"] + assert.False(t, ok) + }, + }, + "passthrough annotations should support wildcard match": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["t.f"] = "j" + c.Annotations["z.g"] = "o" + c.Annotations["z"] = "o" + c.Annotations["y.ca"] = "b" + c.Annotations["y"] = "b" + }, + podAnnotations: []string{"t*", "z.*", "y.c*"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["t.f"], "j") + assert.Equal(t, spec.Annotations["z.g"], "o") + assert.Equal(t, spec.Annotations["y.ca"], "b") + _, ok := spec.Annotations["y"] + assert.False(t, ok) + _, ok = spec.Annotations["z"] + assert.False(t, ok) + }, + }, + } { + t.Logf("TestCase %q", desc) + c := newTestCRIService() + config, imageConfig, specCheck := getRunPodSandboxTestData() + if test.configChange != nil { + test.configChange(config) + } + + if test.imageConfigChange != nil { + test.imageConfigChange(imageConfig) + } + spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, + test.podAnnotations) + if test.expectErr { + assert.Error(t, err) + assert.Nil(t, spec) + continue + } + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, spec) + if test.specCheck != nil { + test.specCheck(t, spec) + } + } +} + +func TestSetupSandboxFiles(t *testing.T) { + const ( + testID = "test-id" + realhostname = "test-real-hostname" + ) + for desc, test := range map[string]struct { + dnsConfig *runtime.DNSConfig + hostname string + ipcMode runtime.NamespaceMode + expectedCalls []ostesting.CalledDetail + }{ + "should check host /dev/shm existence when ipc mode is NODE": { + ipcMode: runtime.NamespaceMode_NODE, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "Hostname", + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte(realhostname + "\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/resolv.conf", + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + os.FileMode(0644), + }, + }, + { + Name: "Stat", + Arguments: []interface{}{"/dev/shm"}, + }, + }, + }, + "should create new /etc/resolv.conf if DNSOptions is set": { + dnsConfig: &runtime.DNSConfig{ + Servers: []string{"8.8.8.8"}, + Searches: []string{"114.114.114.114"}, + Options: []string{"timeout:1"}, + }, + ipcMode: runtime.NamespaceMode_NODE, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "Hostname", + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte(realhostname + "\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + []byte(`search 114.114.114.114 +nameserver 8.8.8.8 +options timeout:1 +`), os.FileMode(0644), + }, + }, + { + Name: "Stat", + Arguments: []interface{}{"/dev/shm"}, + }, + }, + }, + "should create sandbox shm when ipc namespace mode is not NODE": { + ipcMode: runtime.NamespaceMode_POD, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "Hostname", + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte(realhostname + "\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/resolv.conf", + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + os.FileMode(0644), + }, + }, + { + Name: "MkdirAll", + Arguments: []interface{}{ + filepath.Join(testStateDir, sandboxesDir, testID, "shm"), + os.FileMode(0700), + }, + }, + { + Name: "Mount", + // Ignore arguments which are too complex to check. + }, + }, + }, + "should create /etc/hostname when hostname is set": { + hostname: "test-hostname", + ipcMode: runtime.NamespaceMode_NODE, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte("test-hostname\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/resolv.conf", + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + os.FileMode(0644), + }, + }, + { + Name: "Stat", + Arguments: []interface{}{"/dev/shm"}, + }, + }, + }, + } { + t.Logf("TestCase %q", desc) + c := newTestCRIService() + c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { + return realhostname, nil + } + cfg := &runtime.PodSandboxConfig{ + Hostname: test.hostname, + DnsConfig: test.dnsConfig, + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Ipc: test.ipcMode, + }, + }, + }, + } + c.setupSandboxFiles(testID, cfg) + calls := c.os.(*ostesting.FakeOS).GetCalls() + assert.Len(t, calls, len(test.expectedCalls)) + for i, expected := range test.expectedCalls { + if expected.Arguments == nil { + // Ignore arguments. + expected.Arguments = calls[i].Arguments + } + assert.Equal(t, expected, calls[i]) + } + } +} + +func TestParseDNSOption(t *testing.T) { + for desc, test := range map[string]struct { + servers []string + searches []string + options []string + expectedContent string + expectErr bool + }{ + "empty dns options should return empty content": {}, + "non-empty dns options should return correct content": { + servers: []string{"8.8.8.8", "server.google.com"}, + searches: []string{"114.114.114.114"}, + options: []string{"timeout:1"}, + expectedContent: `search 114.114.114.114 +nameserver 8.8.8.8 +nameserver server.google.com +options timeout:1 +`, + }, + "should return error if dns search exceeds limit(6)": { + searches: []string{ + "server0.google.com", + "server1.google.com", + "server2.google.com", + "server3.google.com", + "server4.google.com", + "server5.google.com", + "server6.google.com", + }, + expectErr: true, + }, + } { + t.Logf("TestCase %q", desc) + resolvContent, err := parseDNSOptions(test.servers, test.searches, test.options) + if test.expectErr { + assert.Error(t, err) + continue + } + assert.NoError(t, err) + assert.Equal(t, resolvContent, test.expectedContent) + } +} + +// TODO(windows): Move this to sandbox_run_test.go +func TestTypeurlMarshalUnmarshalSandboxMeta(t *testing.T) { + for desc, test := range map[string]struct { + configChange func(*runtime.PodSandboxConfig) + }{ + "should marshal original config": {}, + "should marshal Linux": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_NODE, + Ipc: runtime.NamespaceMode_NODE, + }, + SupplementalGroups: []int64{1111, 2222}, + } + }, + }, + } { + t.Logf("TestCase %q", desc) + meta := &sandboxstore.Metadata{ + ID: "1", + Name: "sandbox_1", + NetNSPath: "/home/cloud", + } + meta.Config, _, _ = getRunPodSandboxTestData() + if test.configChange != nil { + test.configChange(meta.Config) + } + + any, err := typeurl.MarshalAny(meta) + assert.NoError(t, err) + data, err := typeurl.UnmarshalAny(any) + assert.NoError(t, err) + assert.IsType(t, &sandboxstore.Metadata{}, data) + curMeta, ok := data.(*sandboxstore.Metadata) + assert.True(t, ok) + assert.Equal(t, meta, curMeta) + } +} + +func TestSandboxDisableCgroup(t *testing.T) { + config, imageConfig, _ := getRunPodSandboxTestData() + c := newTestCRIService() + c.config.DisableCgroup = true + spec, err := c.sandboxContainerSpec("test-id", config, imageConfig, "test-cni", []string{}) + require.NoError(t, err) + + t.Log("resource limit should not be set") + assert.Nil(t, spec.Linux.Resources.Memory) + assert.Nil(t, spec.Linux.Resources.CPU) + + t.Log("cgroup path should be empty") + assert.Empty(t, spec.Linux.CgroupsPath) +} + +// TODO(random-liu): [P1] Add unit test for different error cases to make sure +// the function cleans up on error properly. diff --git a/pkg/server/sandbox_run_windows.go b/pkg/server/sandbox_run_windows.go new file mode 100644 index 000000000..c265e1941 --- /dev/null +++ b/pkg/server/sandbox_run_windows.go @@ -0,0 +1,55 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "github.com/containerd/containerd" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/oci" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +// TODO(windows): Add windows support. +// TODO(windows): Configure windows sandbox shares +func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) { + return nil, errdefs.ErrNotImplemented +} + +// No sandbox container spec options for windows yet. +func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + return nil, nil +} + +// No sandbox files needed for windows. +func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + return nil +} + +// No sandbox files needed for windows. +func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + return nil +} + +// No task options needed for windows. +func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { + return nil +} diff --git a/pkg/server/sandbox_status.go b/pkg/server/sandbox_status.go index 977ef83c8..78947c120 100644 --- a/pkg/server/sandbox_status.go +++ b/pkg/server/sandbox_status.go @@ -18,6 +18,7 @@ package server import ( "encoding/json" + goruntime "runtime" "github.com/containerd/containerd" "github.com/containerd/containerd/errdefs" @@ -69,7 +70,8 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox func (c *criService) getIP(sandbox sandboxstore.Sandbox) (string, error) { config := sandbox.Config - if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + if goruntime.GOOS != "windows" && + config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { // For sandboxes using the node network we are not // responsible for reporting the IP. return "", nil diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index 60756fee0..8a53673d8 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -57,8 +57,8 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb } } - if err := c.unmountSandboxFiles(id, sandbox.Config); err != nil { - return nil, errors.Wrap(err, "failed to unmount sandbox files") + if err := c.cleanupSandboxFiles(id, sandbox.Config); err != nil { + return nil, errors.Wrap(err, "failed to cleanup sandbox files") } // Only stop sandbox container when it's running or unknown. diff --git a/pkg/server/service.go b/pkg/server/service.go index 87b2b3ea2..b7fd1c36a 100644 --- a/pkg/server/service.go +++ b/pkg/server/service.go @@ -26,10 +26,6 @@ import ( "github.com/containerd/containerd" "github.com/containerd/containerd/plugin" cni "github.com/containerd/go-cni" - runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor" - runcseccomp "github.com/opencontainers/runc/libcontainer/seccomp" - runcsystem "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/selinux/go-selinux" "github.com/pkg/errors" "github.com/sirupsen/logrus" "google.golang.org/grpc" @@ -68,10 +64,6 @@ type criService struct { config criconfig.Config // imageFSPath is the path to image filesystem. imageFSPath string - // apparmorEnabled indicates whether apparmor is enabled. - apparmorEnabled bool - // seccompEnabled indicates whether seccomp is enabled. - seccompEnabled bool // os is an interface for all required os operations. os osinterface.OS // sandboxStore stores all resources associated with sandboxes. @@ -107,8 +99,6 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi c := &criService{ config: config, client: client, - apparmorEnabled: runcapparmor.IsEnabled() && !config.DisableApparmor, - seccompEnabled: runcseccomp.IsEnabled(), os: osinterface.RealOS{}, sandboxStore: sandboxstore.NewStore(), containerStore: containerstore.NewStore(), @@ -119,20 +109,6 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi initialized: atomic.NewBool(false), } - if runcsystem.RunningInUserNS() { - if !(config.DisableCgroup && !c.apparmorEnabled && config.RestrictOOMScoreAdj) { - logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true") - } - } - - if c.config.EnableSelinux { - if !selinux.GetEnabled() { - logrus.Warn("Selinux is not supported") - } - } else { - selinux.SetDisabled() - } - if client.SnapshotService(c.config.ContainerdConfig.Snapshotter) == nil { return nil, errors.Errorf("failed to find snapshotter %q", c.config.ContainerdConfig.Snapshotter) } @@ -140,23 +116,10 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi c.imageFSPath = imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter) logrus.Infof("Get image filesystem path %q", c.imageFSPath) - // Pod needs to attach to atleast loopback network and a non host network, - // hence networkAttachCount is 2. If there are more network configs the - // pod will be attached to all the networks but we will only use the ip - // of the default network interface as the pod IP. - c.netPlugin, err = cni.New(cni.WithMinNetworkCount(networkAttachCount), - cni.WithPluginConfDir(config.NetworkPluginConfDir), - cni.WithPluginMaxConfNum(config.NetworkPluginMaxConfNum), - cni.WithPluginDir([]string{config.NetworkPluginBinDir})) - if err != nil { - return nil, errors.Wrap(err, "failed to initialize cni") + if err := c.initPlatform(); err != nil { + return nil, errors.Wrap(err, "initialize platform") } - // Try to load the config if it exists. Just log the error if load fails - // This is not disruptive for containerd to panic - if err := c.netPlugin.Load(cni.WithLoNetwork, cni.WithDefaultConf); err != nil { - logrus.WithError(err).Error("Failed to load cni during init, please check CRI plugin status before setting up network for pods") - } // prepare streaming server c.streamServer, err = newStreamServer(c, config.StreamServerAddress, config.StreamServerPort, config.StreamIdleTimeout) if err != nil { diff --git a/pkg/server/service_unix.go b/pkg/server/service_unix.go new file mode 100644 index 000000000..51753eb78 --- /dev/null +++ b/pkg/server/service_unix.go @@ -0,0 +1,74 @@ +// +build !windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + cni "github.com/containerd/go-cni" + runcsystem "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/selinux/go-selinux" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// networkAttachCount is the minimum number of networks the PodSandbox +// attaches to +const networkAttachCount = 2 + +// initPlatform handles linux specific initialization for the CRI service. +func (c *criService) initPlatform() error { + var err error + + if runcsystem.RunningInUserNS() { + if !(c.config.DisableCgroup && !c.apparmorEnabled() && c.config.RestrictOOMScoreAdj) { + logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true") + } + } + + if c.config.EnableSelinux { + if !selinux.GetEnabled() { + logrus.Warn("Selinux is not supported") + } + } else { + selinux.SetDisabled() + } + + // Pod needs to attach to at least loopback network and a non host network, + // hence networkAttachCount is 2. If there are more network configs the + // pod will be attached to all the networks but we will only use the ip + // of the default network interface as the pod IP. + c.netPlugin, err = cni.New(cni.WithMinNetworkCount(networkAttachCount), + cni.WithPluginConfDir(c.config.NetworkPluginConfDir), + cni.WithPluginMaxConfNum(c.config.NetworkPluginMaxConfNum), + cni.WithPluginDir([]string{c.config.NetworkPluginBinDir})) + if err != nil { + return errors.Wrap(err, "failed to initialize cni") + } + + // Try to load the config if it exists. Just log the error if load fails + // This is not disruptive for containerd to panic + if err := c.netPlugin.Load(c.cniLoadOptions()...); err != nil { + logrus.WithError(err).Error("Failed to load cni during init, please check CRI plugin status before setting up network for pods") + } + return nil +} + +// cniLoadOptions returns cni load options for the linux. +func (c *criService) cniLoadOptions() []cni.CNIOpt { + return []cni.CNIOpt{cni.WithLoNetwork, cni.WithDefaultConf} +} diff --git a/pkg/server/service_windows.go b/pkg/server/service_windows.go new file mode 100644 index 000000000..edeb91054 --- /dev/null +++ b/pkg/server/service_windows.go @@ -0,0 +1,35 @@ +// +build windows + +/* +Copyright The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + cni "github.com/containerd/go-cni" +) + +// initPlatform handles linux specific initialization for the CRI service. +// TODO(windows): Initialize CRI plugin for windows +func (c *criService) initPlatform() error { + return nil +} + +// cniLoadOptions returns cni load options for the windows. +// TODO(windows): Implement CNI options for windows. +func (c *criService) cniLoadOptions() []cni.CNIOpt { + return nil +} diff --git a/pkg/server/status.go b/pkg/server/status.go index a55f3d3a4..af2521781 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -22,7 +22,6 @@ import ( goruntime "runtime" "github.com/containerd/containerd/log" - cni "github.com/containerd/go-cni" "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) @@ -42,9 +41,8 @@ func (c *criService) Status(ctx context.Context, r *runtime.StatusRequest) (*run Type: runtime.NetworkReady, Status: true, } - // Load the latest cni configuration to be in sync with the latest network configuration - if err := c.netPlugin.Load(cni.WithLoNetwork, cni.WithDefaultConf); err != nil { + if err := c.netPlugin.Load(c.cniLoadOptions()...); err != nil { log.G(ctx).WithError(err).Errorf("Failed to load cni configuration") } // Check the status of the cni initialization diff --git a/pkg/server/update_runtime_config.go b/pkg/server/update_runtime_config.go index ca2684f3b..79727a5fd 100644 --- a/pkg/server/update_runtime_config.go +++ b/pkg/server/update_runtime_config.go @@ -22,7 +22,6 @@ import ( "text/template" "github.com/containerd/containerd/log" - cni "github.com/containerd/go-cni" "github.com/pkg/errors" "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" @@ -52,7 +51,7 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR if err := c.netPlugin.Status(); err == nil { log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) return &runtime.UpdateRuntimeConfigResponse{}, nil - } else if err := c.netPlugin.Load(cni.WithLoNetwork, cni.WithDefaultConf); err == nil { + } else if err := c.netPlugin.Load(c.cniLoadOptions()...); err == nil { log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) return &runtime.UpdateRuntimeConfigResponse{}, nil }