diff --git a/pkg/cri/sbserver/blockio_linux.go b/pkg/cri/sbserver/blockio_linux.go new file mode 100644 index 000000000..627e5b694 --- /dev/null +++ b/pkg/cri/sbserver/blockio_linux.go @@ -0,0 +1,54 @@ +//go:build linux +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + + "github.com/containerd/containerd/services/tasks" + "github.com/intel/goresctrl/pkg/blockio" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +// blockIOClassFromAnnotations examines container and pod annotations of a +// container and returns its effective blockio class. +func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { + cls, err := blockio.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations) + if err != nil { + return "", err + } + + if cls != "" && !tasks.BlockIOEnabled() { + if c.config.ContainerdConfig.IgnoreBlockIONotEnabledErrors { + cls = "" + logrus.Debugf("continuing create container %s, ignoring blockio not enabled (%v)", containerName, err) + } else { + return "", fmt.Errorf("blockio disabled, refusing to set blockio class of container %q to %q", containerName, cls) + } + } + return cls, nil +} + +// blockIOToLinuxOci converts blockio class name into the LinuxBlockIO +// structure in the OCI runtime spec. +func blockIOToLinuxOci(className string) (*runtimespec.LinuxBlockIO, error) { + return blockio.OciLinuxBlockIO(className) +} diff --git a/pkg/cri/sbserver/blockio_stub_linux.go b/pkg/cri/sbserver/blockio_stub_linux.go new file mode 100644 index 000000000..154875a6a --- /dev/null +++ b/pkg/cri/sbserver/blockio_stub_linux.go @@ -0,0 +1,32 @@ +//go:build !linux +// +build !linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + runtimespec "github.com/opencontainers/runtime-spec/specs-go" +) + +func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { + return "", nil +} + +func blockIOToLinuxOci(className string) (*runtimespec.LinuxBlockIO, error) { + return nil, nil +} diff --git a/pkg/cri/sbserver/cni_conf_syncer.go b/pkg/cri/sbserver/cni_conf_syncer.go new file mode 100644 index 000000000..45adfa518 --- /dev/null +++ b/pkg/cri/sbserver/cni_conf_syncer.go @@ -0,0 +1,125 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + "os" + "sync" + + "github.com/containerd/go-cni" + "github.com/fsnotify/fsnotify" + "github.com/sirupsen/logrus" +) + +// cniNetConfSyncer is used to reload cni network conf triggered by fs change +// events. +type cniNetConfSyncer struct { + // only used for lastSyncStatus + sync.RWMutex + lastSyncStatus error + + watcher *fsnotify.Watcher + confDir string + netPlugin cni.CNI + loadOpts []cni.Opt +} + +// newCNINetConfSyncer creates cni network conf syncer. +func newCNINetConfSyncer(confDir string, netPlugin cni.CNI, loadOpts []cni.Opt) (*cniNetConfSyncer, error) { + watcher, err := fsnotify.NewWatcher() + if err != nil { + return nil, fmt.Errorf("failed to create fsnotify watcher: %w", err) + } + + if err := os.MkdirAll(confDir, 0700); err != nil { + return nil, fmt.Errorf("failed to create cni conf dir=%s for watch: %w", confDir, err) + } + + if err := watcher.Add(confDir); err != nil { + return nil, fmt.Errorf("failed to watch cni conf dir %s: %w", confDir, err) + } + + syncer := &cniNetConfSyncer{ + watcher: watcher, + confDir: confDir, + netPlugin: netPlugin, + loadOpts: loadOpts, + } + + if err := syncer.netPlugin.Load(syncer.loadOpts...); err != nil { + logrus.WithError(err).Error("failed to load cni during init, please check CRI plugin status before setting up network for pods") + syncer.updateLastStatus(err) + } + return syncer, nil +} + +// syncLoop monitors any fs change events from cni conf dir and tries to reload +// cni configuration. +func (syncer *cniNetConfSyncer) syncLoop() error { + for { + select { + case event, ok := <-syncer.watcher.Events: + if !ok { + logrus.Debugf("cni watcher channel is closed") + return nil + } + // Only reload config when receiving write/rename/remove + // events + // + // TODO(fuweid): Might only reload target cni config + // files to prevent no-ops. + if event.Op&(fsnotify.Chmod|fsnotify.Create) > 0 { + logrus.Debugf("ignore event from cni conf dir: %s", event) + continue + } + logrus.Debugf("receiving change event from cni conf dir: %s", event) + + lerr := syncer.netPlugin.Load(syncer.loadOpts...) + if lerr != nil { + logrus.WithError(lerr). + Errorf("failed to reload cni configuration after receiving fs change event(%s)", event) + } + syncer.updateLastStatus(lerr) + + case err := <-syncer.watcher.Errors: + if err != nil { + logrus.WithError(err).Error("failed to continue sync cni conf change") + return err + } + } + } +} + +// lastStatus retrieves last sync status. +func (syncer *cniNetConfSyncer) lastStatus() error { + syncer.RLock() + defer syncer.RUnlock() + return syncer.lastSyncStatus +} + +// updateLastStatus will be called after every single cni load. +func (syncer *cniNetConfSyncer) updateLastStatus(err error) { + syncer.Lock() + defer syncer.Unlock() + syncer.lastSyncStatus = err +} + +// stop stops watcher in the syncLoop. +func (syncer *cniNetConfSyncer) stop() error { + return syncer.watcher.Close() +} diff --git a/pkg/cri/sbserver/container_attach.go b/pkg/cri/sbserver/container_attach.go new file mode 100644 index 000000000..56f69c6a5 --- /dev/null +++ b/pkg/cri/sbserver/container_attach.go @@ -0,0 +1,84 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "io" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/log" + "k8s.io/client-go/tools/remotecommand" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + cio "github.com/containerd/containerd/pkg/cri/io" +) + +// Attach prepares a streaming endpoint to attach to a running container, and returns the address. +func (c *criService) Attach(ctx context.Context, r *runtime.AttachRequest) (*runtime.AttachResponse, error) { + cntr, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("failed to find container in store: %w", err) + } + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state)) + } + return c.streamServer.GetAttach(r) +} + +func (c *criService) attachContainer(ctx context.Context, id string, stdin io.Reader, stdout, stderr io.WriteCloser, + tty bool, resize <-chan remotecommand.TerminalSize) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + // Get container from our container store. + cntr, err := c.containerStore.Get(id) + if err != nil { + return fmt.Errorf("failed to find container %q in store: %w", id, err) + } + id = cntr.ID + + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + return fmt.Errorf("container is in %s state", criContainerStateToString(state)) + } + + task, err := cntr.Container.Task(ctx, nil) + if err != nil { + return fmt.Errorf("failed to load task: %w", err) + } + handleResizing(ctx, resize, func(size remotecommand.TerminalSize) { + if err := task.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to resize task %q console", id) + } + }) + + opts := cio.AttachOptions{ + Stdin: stdin, + Stdout: stdout, + Stderr: stderr, + Tty: tty, + StdinOnce: cntr.Config.StdinOnce, + CloseStdin: func() error { + return task.CloseIO(ctx, containerd.WithStdinCloser) + }, + } + // TODO(random-liu): Figure out whether we need to support historical output. + cntr.IO.Attach(opts) + return nil +} diff --git a/pkg/cri/sbserver/container_create.go b/pkg/cri/sbserver/container_create.go new file mode 100644 index 000000000..ac5cc3417 --- /dev/null +++ b/pkg/cri/sbserver/container_create.go @@ -0,0 +1,363 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "time" + + "github.com/containerd/typeurl" + "github.com/davecgh/go-spew/spew" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/oci" + criconfig "github.com/containerd/containerd/pkg/cri/config" + cio "github.com/containerd/containerd/pkg/cri/io" + customopts "github.com/containerd/containerd/pkg/cri/opts" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + "github.com/containerd/containerd/pkg/cri/util" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" +) + +func init() { + typeurl.Register(&containerstore.Metadata{}, + "github.com/containerd/cri/pkg/store/container", "Metadata") +} + +// CreateContainer creates a new container in the given PodSandbox. +func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (_ *runtime.CreateContainerResponse, retErr error) { + config := r.GetConfig() + log.G(ctx).Debugf("Container config %+v", config) + sandboxConfig := r.GetSandboxConfig() + sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) + if err != nil { + return nil, fmt.Errorf("failed to find sandbox id %q: %w", r.GetPodSandboxId(), err) + } + sandboxID := sandbox.ID + s, err := sandbox.Container.Task(ctx, nil) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox container task: %w", err) + } + sandboxPid := s.Pid() + + // Generate unique id and name for the container and reserve the name. + // Reserve the container name to avoid concurrent `CreateContainer` request creating + // the same container. + id := util.GenerateID() + metadata := config.GetMetadata() + if metadata == nil { + return nil, errors.New("container config must include metadata") + } + containerName := metadata.Name + name := makeContainerName(metadata, sandboxConfig.GetMetadata()) + log.G(ctx).Debugf("Generated id %q for container %q", id, name) + if err = c.containerNameIndex.Reserve(name, id); err != nil { + return nil, fmt.Errorf("failed to reserve container name %q: %w", name, err) + } + defer func() { + // Release the name if the function returns with an error. + if retErr != nil { + c.containerNameIndex.ReleaseByName(name) + } + }() + + // Create initial internal container metadata. + meta := containerstore.Metadata{ + ID: id, + Name: name, + SandboxID: sandboxID, + Config: config, + } + + // Prepare container image snapshot. For container, the image should have + // been pulled before creating the container, so do not ensure the image. + image, err := c.localResolve(config.GetImage().GetImage()) + if err != nil { + return nil, fmt.Errorf("failed to resolve image %q: %w", config.GetImage().GetImage(), err) + } + containerdImage, err := c.toContainerdImage(ctx, image) + if err != nil { + return nil, fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err) + } + + start := time.Now() + // Run container using the same runtime with sandbox. + sandboxInfo, err := sandbox.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox %q info: %w", sandboxID, err) + } + + // Create container root directory. + containerRootDir := c.getContainerRootDir(id) + if err = c.os.MkdirAll(containerRootDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create container root directory %q: %w", + containerRootDir, err) + } + defer func() { + if retErr != nil { + // Cleanup the container root directory. + if err = c.os.RemoveAll(containerRootDir); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to remove container root directory %q", + containerRootDir) + } + } + }() + volatileContainerRootDir := c.getVolatileContainerRootDir(id) + if err = c.os.MkdirAll(volatileContainerRootDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create volatile container root directory %q: %w", + volatileContainerRootDir, err) + } + defer func() { + if retErr != nil { + // Cleanup the volatile container root directory. + if err = c.os.RemoveAll(volatileContainerRootDir); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to remove volatile container root directory %q", + volatileContainerRootDir) + } + } + }() + + var volumeMounts []*runtime.Mount + if !c.config.IgnoreImageDefinedVolumes { + // Create container image volumes mounts. + volumeMounts = c.volumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config) + } else if len(image.ImageSpec.Config.Volumes) != 0 { + log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID) + } + + // Generate container mounts. + mounts := c.containerMounts(sandboxID, config) + + ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) + } + log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id) + + spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, containerName, containerdImage.Name(), config, sandboxConfig, + &image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime) + if err != nil { + return nil, fmt.Errorf("failed to generate container %q spec: %w", id, err) + } + + meta.ProcessLabel = spec.Process.SelinuxLabel + + // handle any KVM based runtime + if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil { + return nil, err + } + + if config.GetLinux().GetSecurityContext().GetPrivileged() { + // If privileged don't set the SELinux label but still record it on the container so + // the unused MCS label can be release later + spec.Process.SelinuxLabel = "" + } + defer func() { + if retErr != nil { + selinux.ReleaseLabel(spec.Process.SelinuxLabel) + } + }() + + log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec)) + + // Grab any platform specific snapshotter opts. + sOpts := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config) + + // Set snapshotter before any other options. + opts := []containerd.NewContainerOpts{ + containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)), + // Prepare container rootfs. This is always writeable even if + // the container wants a readonly rootfs since we want to give + // the runtime (runc) a chance to modify (e.g. to create mount + // points corresponding to spec.Mounts) before making the + // rootfs readonly (requested by spec.Root.Readonly). + customopts.WithNewSnapshot(id, containerdImage, sOpts...), + } + if len(volumeMounts) > 0 { + mountMap := make(map[string]string) + for _, v := range volumeMounts { + mountMap[filepath.Clean(v.HostPath)] = v.ContainerPath + } + opts = append(opts, customopts.WithVolumes(mountMap)) + } + meta.ImageRef = image.ID + meta.StopSignal = image.ImageSpec.Config.StopSignal + + // Validate log paths and compose full container log path. + if sandboxConfig.GetLogDirectory() != "" && config.GetLogPath() != "" { + meta.LogPath = filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath()) + log.G(ctx).Debugf("Composed container full log path %q using sandbox log dir %q and container log path %q", + meta.LogPath, sandboxConfig.GetLogDirectory(), config.GetLogPath()) + } else { + log.G(ctx).Infof("Logging will be disabled due to empty log paths for sandbox (%q) or container (%q)", + sandboxConfig.GetLogDirectory(), config.GetLogPath()) + } + + containerIO, err := cio.NewContainerIO(id, + cio.WithNewFIFOs(volatileContainerRootDir, config.GetTty(), config.GetStdin())) + if err != nil { + return nil, fmt.Errorf("failed to create container io: %w", err) + } + defer func() { + if retErr != nil { + if err := containerIO.Close(); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to close container io %q", id) + } + } + }() + + specOpts, err := c.containerSpecOpts(config, &image.ImageSpec.Config) + if err != nil { + return nil, fmt.Errorf("failed to get container spec opts: %w", err) + } + + containerLabels := buildLabels(config.Labels, image.ImageSpec.Config.Labels, containerKindContainer) + + runtimeOptions, err := getRuntimeOptions(sandboxInfo) + if err != nil { + return nil, fmt.Errorf("failed to get runtime options: %w", err) + } + + opts = append(opts, + containerd.WithSpec(spec, specOpts...), + containerd.WithRuntime(sandboxInfo.Runtime.Name, runtimeOptions), + containerd.WithContainerLabels(containerLabels), + containerd.WithContainerExtension(containerMetadataExtension, &meta)) + var cntr containerd.Container + if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil { + return nil, fmt.Errorf("failed to create containerd container: %w", err) + } + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + if err := cntr.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id) + } + } + }() + + status := containerstore.Status{CreatedAt: time.Now().UnixNano()} + container, err := containerstore.NewContainer(meta, + containerstore.WithStatus(status, containerRootDir), + containerstore.WithContainer(cntr), + containerstore.WithContainerIO(containerIO), + ) + if err != nil { + return nil, fmt.Errorf("failed to create internal container object for %q: %w", id, err) + } + defer func() { + if retErr != nil { + // Cleanup container checkpoint on error. + if err := container.Delete(); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to cleanup container checkpoint for %q", id) + } + } + }() + + // Add container into container store. + if err := c.containerStore.Add(container); err != nil { + return nil, fmt.Errorf("failed to add container %q into store: %w", id, err) + } + + containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start) + + return &runtime.CreateContainerResponse{ContainerId: id}, nil +} + +// volumeMounts sets up image volumes for container. Rely on the removal of container +// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts +// specified with the same destination. +func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount { + if len(config.Volumes) == 0 { + return nil + } + var mounts []*runtime.Mount + for dst := range config.Volumes { + if isInCRIMounts(dst, criMounts) { + // Skip the image volume, if there is CRI defined volume mapping. + // TODO(random-liu): This should be handled by Kubelet in the future. + // Kubelet should decide what to use for image volume, and also de-duplicate + // the image volume and user mounts. + continue + } + volumeID := util.GenerateID() + src := filepath.Join(containerRootDir, "volumes", volumeID) + // addOCIBindMounts will create these volumes. + mounts = append(mounts, &runtime.Mount{ + ContainerPath: dst, + HostPath: src, + SelinuxRelabel: true, + }) + } + return mounts +} + +// runtimeSpec returns a default runtime spec used in cri-containerd. +func (c *criService) runtimeSpec(id string, baseSpecFile string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { + // GenerateSpec needs namespace. + ctx := ctrdutil.NamespacedContext() + container := &containers.Container{ID: id} + + if baseSpecFile != "" { + baseSpec, ok := c.baseOCISpecs[baseSpecFile] + if !ok { + return nil, fmt.Errorf("can't find base OCI spec %q", baseSpecFile) + } + + spec := oci.Spec{} + if err := util.DeepCopy(&spec, &baseSpec); err != nil { + return nil, fmt.Errorf("failed to clone OCI spec: %w", err) + } + + // Fix up cgroups path + applyOpts := append([]oci.SpecOpts{oci.WithNamespacedCgroup()}, opts...) + + if err := oci.ApplyOpts(ctx, nil, container, &spec, applyOpts...); err != nil { + return nil, fmt.Errorf("failed to apply OCI options: %w", err) + } + + return &spec, nil + } + + spec, err := oci.GenerateSpec(ctx, nil, container, opts...) + if err != nil { + return nil, fmt.Errorf("failed to generate spec: %w", err) + } + + return spec, nil +} + +// Overrides the default snapshotter if Snapshotter is set for this runtime. +// See See https://github.com/containerd/containerd/issues/6657 +func (c *criService) runtimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string { + if ociRuntime.Snapshotter == "" { + return c.config.ContainerdConfig.Snapshotter + } + + log.G(ctx).Debugf("Set snapshotter for runtime %s to %s", ociRuntime.Type, ociRuntime.Snapshotter) + return ociRuntime.Snapshotter +} diff --git a/pkg/cri/sbserver/container_create_linux.go b/pkg/cri/sbserver/container_create_linux.go new file mode 100644 index 000000000..9482b6341 --- /dev/null +++ b/pkg/cri/sbserver/container_create_linux.go @@ -0,0 +1,605 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/containerd/cgroups" + "github.com/containerd/containerd/contrib/apparmor" + "github.com/containerd/containerd/contrib/seccomp" + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/snapshots" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux/label" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/config" + customopts "github.com/containerd/containerd/pkg/cri/opts" +) + +const ( + // profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName. + profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747 + // runtimeDefault indicates that we should use or create a runtime default profile. + runtimeDefault = "runtime/default" + // dockerDefault indicates that we should use or create a docker default profile. + dockerDefault = "docker/default" + // appArmorDefaultProfileName is name to use when creating a default apparmor profile. + appArmorDefaultProfileName = "cri-containerd.apparmor.d" + // unconfinedProfile is a string indicating one should run a pod/containerd without a security profile + unconfinedProfile = "unconfined" + // seccompDefaultProfile is the default seccomp profile. + seccompDefaultProfile = dockerDefault +) + +// containerMounts sets up necessary container system file mounts +// including /dev/shm, /etc/hosts and /etc/resolv.conf. +func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { + var mounts []*runtime.Mount + securityContext := config.GetLinux().GetSecurityContext() + if !isInCRIMounts(etcHostname, config.GetMounts()) { + // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. + // For in-place upgrade, the old sandbox doesn't have the hostname file, + // do not mount this in that case. + // TODO(random-liu): Remove the check and always mount this when + // containerd 1.1 and 1.2 are deprecated. + hostpath := c.getSandboxHostname(sandboxID) + if _, err := c.os.Stat(hostpath); err == nil { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHostname, + HostPath: hostpath, + Readonly: securityContext.GetReadonlyRootfs(), + SelinuxRelabel: true, + }) + } + } + + if !isInCRIMounts(etcHosts, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHosts, + HostPath: c.getSandboxHosts(sandboxID), + Readonly: securityContext.GetReadonlyRootfs(), + SelinuxRelabel: true, + }) + } + + // Mount sandbox resolv.config. + // TODO: Need to figure out whether we should always mount it as read-only + if !isInCRIMounts(resolvConfPath, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: resolvConfPath, + HostPath: c.getResolvPath(sandboxID), + Readonly: securityContext.GetReadonlyRootfs(), + SelinuxRelabel: true, + }) + } + + if !isInCRIMounts(devShm, config.GetMounts()) { + sandboxDevShm := c.getSandboxDevShm(sandboxID) + if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { + sandboxDevShm = devShm + } + mounts = append(mounts, &runtime.Mount{ + ContainerPath: devShm, + HostPath: sandboxDevShm, + Readonly: false, + SelinuxRelabel: sandboxDevShm != devShm, + }) + } + return mounts +} + +func (c *criService) containerSpec( + id string, + sandboxID string, + sandboxPid uint32, + netNSPath string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime config.Runtime, +) (_ *runtimespec.Spec, retErr error) { + specOpts := []oci.SpecOpts{ + oci.WithoutRunMount, + } + // only clear the default security settings if the runtime does not have a custom + // base runtime spec spec. Admins can use this functionality to define + // default ulimits, seccomp, or other default settings. + if ociRuntime.BaseRuntimeSpec == "" { + specOpts = append(specOpts, customopts.WithoutDefaultSecuritySettings) + } + specOpts = append(specOpts, + customopts.WithRelativeRoot(relativeRootfsPath), + customopts.WithProcessArgs(config, imageConfig), + oci.WithDefaultPathEnv, + // this will be set based on the security context below + oci.WithNewPrivileges, + ) + if config.GetWorkingDir() != "" { + specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) + } else if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if config.GetTty() { + specOpts = append(specOpts, oci.WithTTY) + } + + // Add HOSTNAME env. + var ( + err error + hostname = sandboxConfig.GetHostname() + ) + if hostname == "" { + if hostname, err = c.os.Hostname(); err != nil { + return nil, err + } + } + specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname})) + + // Apply envs from image config first, so that envs from container config + // can override them. + env := append([]string{}, imageConfig.Env...) + for _, e := range config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + specOpts = append(specOpts, oci.WithEnv(env)) + + securityContext := config.GetLinux().GetSecurityContext() + labelOptions, err := toLabel(securityContext.GetSelinuxOptions()) + if err != nil { + return nil, err + } + if len(labelOptions) == 0 { + // Use pod level SELinux config + if sandbox, err := c.sandboxStore.Get(sandboxID); err == nil { + labelOptions, err = selinux.DupSecOpt(sandbox.ProcessLabel) + if err != nil { + return nil, err + } + } + } + + processLabel, mountLabel, err := label.InitLabels(labelOptions) + if err != nil { + return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err) + } + defer func() { + if retErr != nil { + _ = label.ReleaseLabel(processLabel) + } + }() + + specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel)) + + if !c.config.DisableProcMount { + // Change the default masked/readonly paths to empty slices + // See https://github.com/containerd/containerd/issues/5029 + // TODO: Provide an option to set default paths to the ones in oci.populateDefaultUnixSpec() + specOpts = append(specOpts, oci.WithMaskedPaths([]string{}), oci.WithReadonlyPaths([]string{})) + + // Apply masked paths if specified. + // If the container is privileged, this will be cleared later on. + if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil { + specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths)) + } + + // Apply readonly paths if specified. + // If the container is privileged, this will be cleared later on. + if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil { + specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths)) + } + } + + specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext), + customopts.WithCapabilities(securityContext, c.allCaps)) + + if securityContext.GetPrivileged() { + if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() { + return nil, errors.New("no privileged container allowed in sandbox") + } + specOpts = append(specOpts, oci.WithPrivileged) + if !ociRuntime.PrivilegedWithoutHostDevices { + specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed) + } else if ociRuntime.PrivilegedWithoutHostDevicesAllDevicesAllowed { + // allow rwm on all devices for the container + specOpts = append(specOpts, oci.WithAllDevicesAllowed) + } + } + + // Clear all ambient capabilities. The implication of non-root + caps + // is not clearly defined in Kubernetes. + // See https://github.com/kubernetes/kubernetes/issues/56374 + // Keep docker's behavior for now. + specOpts = append(specOpts, + customopts.WithoutAmbientCaps, + customopts.WithSelinuxLabels(processLabel, mountLabel), + ) + + // TODO: Figure out whether we should set no new privilege for sandbox container by default + if securityContext.GetNoNewPrivs() { + specOpts = append(specOpts, oci.WithNoNewPrivileges) + } + // TODO(random-liu): [P1] Set selinux options (privileged or not). + if securityContext.GetReadonlyRootfs() { + specOpts = append(specOpts, oci.WithRootFSReadonly()) + } + + if c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDisabledCgroups) + } else { + specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources(), c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController)) + if sandboxConfig.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id) + specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) + } + } + + supplementalGroups := securityContext.GetSupplementalGroups() + + // Get blockio class + blockIOClass, err := c.blockIOClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) + if err != nil { + return nil, fmt.Errorf("failed to set blockio class: %w", err) + } + if blockIOClass != "" { + if linuxBlockIO, err := blockIOToLinuxOci(blockIOClass); err == nil { + specOpts = append(specOpts, oci.WithBlockIO(linuxBlockIO)) + } else { + return nil, err + } + } + + // Get RDT class + rdtClass, err := c.rdtClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) + if err != nil { + return nil, fmt.Errorf("failed to set RDT class: %w", err) + } + if rdtClass != "" { + specOpts = append(specOpts, oci.WithRdt(rdtClass, "", "")) + } + + for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, + ociRuntime.PodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + ociRuntime.ContainerAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + // Default target PID namespace is the sandbox PID. + targetPid := sandboxPid + // If the container targets another container's PID namespace, + // set targetPid to the PID of that container. + nsOpts := securityContext.GetNamespaceOptions() + if nsOpts.GetPid() == runtime.NamespaceMode_TARGET { + targetContainer, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId) + if err != nil { + return nil, fmt.Errorf("invalid target container: %w", err) + } + + status := targetContainer.Status.Get() + targetPid = status.Pid + } + + specOpts = append(specOpts, + customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), + customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), + customopts.WithSupplementalGroups(supplementalGroups), + customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), + customopts.WithAnnotation(annotations.SandboxID, sandboxID), + customopts.WithAnnotation(annotations.SandboxNamespace, sandboxConfig.GetMetadata().GetNamespace()), + customopts.WithAnnotation(annotations.SandboxName, sandboxConfig.GetMetadata().GetName()), + customopts.WithAnnotation(annotations.ContainerName, containerName), + customopts.WithAnnotation(annotations.ImageName, imageName), + ) + // cgroupns is used for hiding /sys/fs/cgroup from containers. + // For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged. + // https://github.com/containers/libpod/issues/4363 + // https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace + if cgroups.Mode() == cgroups.Unified && !securityContext.GetPrivileged() { + specOpts = append(specOpts, oci.WithLinuxNamespace( + runtimespec.LinuxNamespace{ + Type: runtimespec.CgroupNamespace, + })) + } + return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...) +} + +func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + var specOpts []oci.SpecOpts + securityContext := config.GetLinux().GetSecurityContext() + // Set container username. This could only be done by containerd, because it needs + // access to the container rootfs. Pass user name to containerd, and let it overwrite + // the spec for us. + userstr, err := generateUserString( + securityContext.GetRunAsUsername(), + securityContext.GetRunAsUser(), + securityContext.GetRunAsGroup()) + if err != nil { + return nil, fmt.Errorf("failed to generate user string: %w", err) + } + if userstr == "" { + // Lastly, since no user override was passed via CRI try to set via OCI + // Image + userstr = imageConfig.User + } + if userstr != "" { + specOpts = append(specOpts, oci.WithUser(userstr)) + } + + if securityContext.GetRunAsUsername() != "" { + userstr = securityContext.GetRunAsUsername() + } else { + // Even if RunAsUser is not set, we still call `GetValue` to get uid 0. + // Because it is still useful to get additional gids for uid 0. + userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10) + } + specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr)) + + asp := securityContext.GetApparmor() + if asp == nil { + asp, err = generateApparmorSecurityProfile(securityContext.GetApparmorProfile()) //nolint:staticcheck // Deprecated but we don't want to remove yet + if err != nil { + return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err) + } + } + apparmorSpecOpts, err := generateApparmorSpecOpts( + asp, + securityContext.GetPrivileged(), + c.apparmorEnabled()) + if err != nil { + return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err) + } + if apparmorSpecOpts != nil { + specOpts = append(specOpts, apparmorSpecOpts) + } + + ssp := securityContext.GetSeccomp() + if ssp == nil { + ssp, err = generateSeccompSecurityProfile( + securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet + c.config.UnsetSeccompProfile) + if err != nil { + return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) + } + } + seccompSpecOpts, err := c.generateSeccompSpecOpts( + ssp, + securityContext.GetPrivileged(), + c.seccompEnabled()) + if err != nil { + return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) + } + if seccompSpecOpts != nil { + specOpts = append(specOpts, seccompSpecOpts) + } + if c.config.EnableCDI { + specOpts = append(specOpts, oci.WithCDI(config.Annotations, c.config.CDISpecDirs)) + } + return specOpts, nil +} + +func generateSeccompSecurityProfile(profilePath string, unsetProfilePath string) (*runtime.SecurityProfile, error) { + if profilePath != "" { + return generateSecurityProfile(profilePath) + } + if unsetProfilePath != "" { + return generateSecurityProfile(unsetProfilePath) + } + return nil, nil +} +func generateApparmorSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) { + if profilePath != "" { + return generateSecurityProfile(profilePath) + } + return nil, nil +} + +func generateSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) { + switch profilePath { + case runtimeDefault, dockerDefault, "": + return &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, nil + case unconfinedProfile: + return &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Unconfined, + }, nil + default: + // Require and Trim default profile name prefix + if !strings.HasPrefix(profilePath, profileNamePrefix) { + return nil, fmt.Errorf("invalid profile %q", profilePath) + } + return &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Localhost, + LocalhostRef: strings.TrimPrefix(profilePath, profileNamePrefix), + }, nil + } +} + +// generateSeccompSpecOpts generates containerd SpecOpts for seccomp. +func (c *criService) generateSeccompSpecOpts(sp *runtime.SecurityProfile, privileged, seccompEnabled bool) (oci.SpecOpts, error) { + if privileged { + // Do not set seccomp profile when container is privileged + return nil, nil + } + if !seccompEnabled { + if sp != nil { + if sp.ProfileType != runtime.SecurityProfile_Unconfined { + return nil, errors.New("seccomp is not supported") + } + } + return nil, nil + } + + if sp == nil { + return nil, nil + } + + if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" { + return nil, errors.New("seccomp config invalid LocalhostRef must only be set if ProfileType is Localhost") + } + switch sp.ProfileType { + case runtime.SecurityProfile_Unconfined: + // Do not set seccomp profile. + return nil, nil + case runtime.SecurityProfile_RuntimeDefault: + return seccomp.WithDefaultProfile(), nil + case runtime.SecurityProfile_Localhost: + // trimming the localhost/ prefix just in case even though it should not + // be necessary with the new SecurityProfile struct + return seccomp.WithProfile(strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)), nil + default: + return nil, errors.New("seccomp unknown ProfileType") + } +} + +// generateApparmorSpecOpts generates containerd SpecOpts for apparmor. +func generateApparmorSpecOpts(sp *runtime.SecurityProfile, privileged, apparmorEnabled bool) (oci.SpecOpts, error) { + if !apparmorEnabled { + // Should fail loudly if user try to specify apparmor profile + // but we don't support it. + if sp != nil { + if sp.ProfileType != runtime.SecurityProfile_Unconfined { + return nil, errors.New("apparmor is not supported") + } + } + return nil, nil + } + + if sp == nil { + // Based on kubernetes#51746, default apparmor profile should be applied + // for when apparmor is not specified. + sp, _ = generateSecurityProfile("") + } + + if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" { + return nil, errors.New("apparmor config invalid LocalhostRef must only be set if ProfileType is Localhost") + } + + switch sp.ProfileType { + case runtime.SecurityProfile_Unconfined: + // Do not set apparmor profile. + return nil, nil + case runtime.SecurityProfile_RuntimeDefault: + if privileged { + // Do not set apparmor profile when container is privileged + return nil, nil + } + // TODO (mikebrow): delete created apparmor default profile + return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil + case runtime.SecurityProfile_Localhost: + // trimming the localhost/ prefix just in case even through it should not + // be necessary with the new SecurityProfile struct + appArmorProfile := strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix) + if profileExists, err := appArmorProfileExists(appArmorProfile); !profileExists { + if err != nil { + return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err) + } + return nil, fmt.Errorf("apparmor profile not found %s", appArmorProfile) + } + return apparmor.WithProfile(appArmorProfile), nil + default: + return nil, errors.New("apparmor unknown ProfileType") + } +} + +// appArmorProfileExists scans apparmor/profiles for the requested profile +func appArmorProfileExists(profile string) (bool, error) { + if profile == "" { + return false, errors.New("nil apparmor profile is not supported") + } + profiles, err := os.Open("/sys/kernel/security/apparmor/profiles") + if err != nil { + return false, err + } + defer profiles.Close() + + rbuff := bufio.NewReader(profiles) + for { + line, err := rbuff.ReadString('\n') + switch err { + case nil: + if strings.HasPrefix(line, profile+" (") { + return true, nil + } + case io.EOF: + return false, nil + default: + return false, err + } + } +} + +// generateUserString generates valid user string based on OCI Image Spec +// v1.0.0. +// +// CRI defines that the following combinations are valid: +// +// (none) -> "" +// username -> username +// username, uid -> username +// username, uid, gid -> username:gid +// username, gid -> username:gid +// uid -> uid +// uid, gid -> uid:gid +// gid -> error +// +// TODO(random-liu): Add group name support in CRI. +func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) { + var userstr, groupstr string + if uid != nil { + userstr = strconv.FormatInt(uid.GetValue(), 10) + } + if username != "" { + userstr = username + } + if gid != nil { + groupstr = strconv.FormatInt(gid.GetValue(), 10) + } + if userstr == "" { + if groupstr != "" { + return "", fmt.Errorf("user group %q is specified without user", groupstr) + } + return "", nil + } + if groupstr != "" { + userstr = userstr + ":" + groupstr + } + return userstr, nil +} + +// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot +func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { + return []snapshots.Opt{} +} diff --git a/pkg/cri/sbserver/container_create_linux_test.go b/pkg/cri/sbserver/container_create_linux_test.go new file mode 100644 index 000000000..e4d424892 --- /dev/null +++ b/pkg/cri/sbserver/container_create_linux_test.go @@ -0,0 +1,1669 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/contrib/apparmor" + "github.com/containerd/containerd/contrib/seccomp" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/oci" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cap" + "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/config" + "github.com/containerd/containerd/pkg/cri/opts" + "github.com/containerd/containerd/pkg/cri/util" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + ostesting "github.com/containerd/containerd/pkg/os/testing" +) + +func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, + *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Image: &runtime.ImageSpec{ + Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", + }, + Command: []string{"test", "command"}, + Args: []string{"test", "args"}, + WorkingDir: "test-cwd", + Envs: []*runtime.KeyValue{ + {Key: "k1", Value: "v1"}, + {Key: "k2", Value: "v2"}, + {Key: "k3", Value: "v3=v3bis"}, + {Key: "k4", Value: "v4=v4bis=foop"}, + }, + Mounts: []*runtime.Mount{ + // everything default + { + ContainerPath: "container-path-1", + HostPath: "host-path-1", + }, + // readOnly + { + ContainerPath: "container-path-2", + HostPath: "host-path-2", + Readonly: true, + }, + }, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"ca-c": "ca-d"}, + Linux: &runtime.LinuxContainerConfig{ + Resources: &runtime.LinuxContainerResources{ + CpuPeriod: 100, + CpuQuota: 200, + CpuShares: 300, + MemoryLimitInBytes: 400, + OomScoreAdj: 500, + CpusetCpus: "0-1", + CpusetMems: "2-3", + Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, + }, + SecurityContext: &runtime.LinuxContainerSecurityContext{ + SupplementalGroups: []int64{1111, 2222}, + NoNewPrivs: true, + }, + }, + } + sandboxConfig := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-sandbox-name", + Uid: "test-sandbox-uid", + Namespace: "test-sandbox-ns", + Attempt: 2, + }, + Annotations: map[string]string{"c": "d"}, + Linux: &runtime.LinuxPodSandboxConfig{ + CgroupParent: "/test/cgroup/parent", + SecurityContext: &runtime.LinuxSandboxSecurityContext{}, + }, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, + Entrypoint: []string{"/entrypoint"}, + Cmd: []string{"cmd"}, + WorkingDir: "/workspace", + } + specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { + assert.Equal(t, relativeRootfsPath, spec.Root.Path) + assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) + assert.Equal(t, "test-cwd", spec.Process.Cwd) + assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") + assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") + + t.Logf("Check cgroups bind mount") + checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, nil) + + t.Logf("Check bind mount") + checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rbind", "rprivate", "rw"}, nil) + checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"rbind", "rprivate", "ro"}, nil) + + t.Logf("Check resource limits") + assert.EqualValues(t, *spec.Linux.Resources.CPU.Period, 100) + assert.EqualValues(t, *spec.Linux.Resources.CPU.Quota, 200) + assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, 300) + assert.EqualValues(t, spec.Linux.Resources.CPU.Cpus, "0-1") + assert.EqualValues(t, spec.Linux.Resources.CPU.Mems, "2-3") + assert.EqualValues(t, spec.Linux.Resources.Unified, map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}) + assert.EqualValues(t, *spec.Linux.Resources.Memory.Limit, 400) + assert.EqualValues(t, *spec.Process.OOMScoreAdj, 500) + + t.Logf("Check supplemental groups") + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) + + t.Logf("Check no_new_privs") + assert.Equal(t, spec.Process.NoNewPrivileges, true) + + t.Logf("Check cgroup path") + assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) + + t.Logf("Check namespaces") + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + Path: opts.GetNetworkNamespace(sandboxPid), + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.IPCNamespace, + Path: opts.GetIPCNamespace(sandboxPid), + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UTSNamespace, + Path: opts.GetUTSNamespace(sandboxPid), + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + Path: opts.GetPIDNamespace(sandboxPid), + }) + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) + + assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) + assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns") + + assert.Contains(t, spec.Annotations, annotations.SandboxName) + assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name") + + assert.Contains(t, spec.Annotations, annotations.ImageName) + assert.EqualValues(t, spec.Annotations[annotations.ImageName], testImageName) + } + return config, sandboxConfig, imageConfig, specCheck +} + +func TestContainerCapabilities(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + allCaps := cap.Known() + for desc, test := range map[string]struct { + capability *runtime.Capability + includes []string + excludes []string + }{ + "should be able to add/drop capabilities": { + capability: &runtime.Capability{ + AddCapabilities: []string{"SYS_ADMIN"}, + DropCapabilities: []string{"CHOWN"}, + }, + includes: []string{"CAP_SYS_ADMIN"}, + excludes: []string{"CAP_CHOWN"}, + }, + "should be able to add all capabilities": { + capability: &runtime.Capability{ + AddCapabilities: []string{"ALL"}, + }, + includes: allCaps, + }, + "should be able to drop all capabilities": { + capability: &runtime.Capability{ + DropCapabilities: []string{"ALL"}, + }, + excludes: allCaps, + }, + "should be able to drop capabilities with add all": { + capability: &runtime.Capability{ + AddCapabilities: []string{"ALL"}, + DropCapabilities: []string{"CHOWN"}, + }, + includes: util.SubtractStringSlice(allCaps, "CAP_CHOWN"), + excludes: []string{"CAP_CHOWN"}, + }, + "should be able to add capabilities with drop all": { + capability: &runtime.Capability{ + AddCapabilities: []string{"SYS_ADMIN"}, + DropCapabilities: []string{"ALL"}, + }, + includes: []string{"CAP_SYS_ADMIN"}, + excludes: util.SubtractStringSlice(allCaps, "CAP_SYS_ADMIN"), + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + c.allCaps = allCaps + + containerConfig.Linux.SecurityContext.Capabilities = test.capability + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + + if selinux.GetEnabled() { + assert.NotEqual(t, "", spec.Process.SelinuxLabel) + assert.NotEqual(t, "", spec.Linux.MountLabel) + } + + specCheck(t, testID, testSandboxID, testPid, spec) + for _, include := range test.includes { + assert.Contains(t, spec.Process.Capabilities.Bounding, include) + assert.Contains(t, spec.Process.Capabilities.Effective, include) + assert.Contains(t, spec.Process.Capabilities.Permitted, include) + } + for _, exclude := range test.excludes { + assert.NotContains(t, spec.Process.Capabilities.Bounding, exclude) + assert.NotContains(t, spec.Process.Capabilities.Effective, exclude) + assert.NotContains(t, spec.Process.Capabilities.Permitted, exclude) + } + assert.Empty(t, spec.Process.Capabilities.Inheritable) + assert.Empty(t, spec.Process.Capabilities.Ambient) + }) + } +} + +func TestContainerSpecTty(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for _, tty := range []bool{true, false} { + containerConfig.Tty = tty + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Equal(t, tty, spec.Process.Terminal) + if tty { + assert.Contains(t, spec.Process.Env, "TERM=xterm") + } else { + assert.NotContains(t, spec.Process.Env, "TERM=xterm") + } + } +} + +func TestContainerSpecDefaultPath(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + expectedDefault := "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for _, pathenv := range []string{"", "PATH=/usr/local/bin/games"} { + expected := expectedDefault + if pathenv != "" { + imageConfig.Env = append(imageConfig.Env, pathenv) + expected = pathenv + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Contains(t, spec.Process.Env, expected) + } +} + +func TestContainerSpecReadonlyRootfs(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for _, readonly := range []bool{true, false} { + containerConfig.Linux.SecurityContext.ReadonlyRootfs = readonly + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Equal(t, readonly, spec.Root.Readonly) + } +} + +func TestContainerSpecWithExtraMounts(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + mountInConfig := &runtime.Mount{ + // Test cleanpath + ContainerPath: "test-container-path/", + HostPath: "test-host-path", + Readonly: false, + } + containerConfig.Mounts = append(containerConfig.Mounts, mountInConfig) + extraMounts := []*runtime.Mount{ + { + ContainerPath: "test-container-path", + HostPath: "test-host-path-extra", + Readonly: true, + }, + { + ContainerPath: "/sys", + HostPath: "test-sys-extra", + Readonly: false, + }, + { + ContainerPath: "/dev", + HostPath: "test-dev-extra", + Readonly: false, + }, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + var mounts, sysMounts, devMounts []runtimespec.Mount + for _, m := range spec.Mounts { + if strings.HasPrefix(m.Destination, "test-container-path") { + mounts = append(mounts, m) + } else if m.Destination == "/sys" { + sysMounts = append(sysMounts, m) + } else if strings.HasPrefix(m.Destination, "/dev") { + devMounts = append(devMounts, m) + } + } + t.Logf("CRI mount should override extra mount") + require.Len(t, mounts, 1) + assert.Equal(t, "test-host-path", mounts[0].Source) + assert.Contains(t, mounts[0].Options, "rw") + + t.Logf("Extra mount should override default mount") + require.Len(t, sysMounts, 1) + assert.Equal(t, "test-sys-extra", sysMounts[0].Source) + assert.Contains(t, sysMounts[0].Options, "rw") + + t.Logf("Dev mount should override all default dev mounts") + require.Len(t, devMounts, 1) + assert.Equal(t, "test-dev-extra", devMounts[0].Source) + assert.Contains(t, devMounts[0].Options, "rw") +} + +func TestContainerAndSandboxPrivileged(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for desc, test := range map[string]struct { + containerPrivileged bool + sandboxPrivileged bool + expectError bool + }{ + "privileged container in non-privileged sandbox should fail": { + containerPrivileged: true, + sandboxPrivileged: false, + expectError: true, + }, + "privileged container in privileged sandbox should be fine": { + containerPrivileged: true, + sandboxPrivileged: true, + expectError: false, + }, + "non-privileged container in privileged sandbox should be fine": { + containerPrivileged: false, + sandboxPrivileged: true, + expectError: false, + }, + "non-privileged container in non-privileged sandbox should be fine": { + containerPrivileged: false, + sandboxPrivileged: false, + expectError: false, + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig.Linux.SecurityContext.Privileged = test.containerPrivileged + sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + Privileged: test.sandboxPrivileged, + } + _, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + if test.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestContainerMounts(t *testing.T) { + const testSandboxID = "test-id" + for desc, test := range map[string]struct { + statFn func(string) (os.FileInfo, error) + criMounts []*runtime.Mount + securityContext *runtime.LinuxContainerSecurityContext + expectedMounts []*runtime.Mount + }{ + "should setup ro mount when rootfs is read-only": { + securityContext: &runtime.LinuxContainerSecurityContext{ + ReadonlyRootfs: true, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: true, + SelinuxRelabel: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: true, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: true, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, + }, + "should setup rw mount when rootfs is read-write": { + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, + }, + "should use host /dev/shm when host ipc is set": { + securityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE}, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: "/dev/shm", + Readonly: false, + }, + }, + }, + "should skip container mounts if already mounted by CRI": { + criMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: "/test-etc-hostname", + }, + { + ContainerPath: "/etc/hosts", + HostPath: "/test-etc-host", + }, + { + ContainerPath: resolvConfPath, + HostPath: "test-resolv-conf", + }, + { + ContainerPath: "/dev/shm", + HostPath: "test-dev-shm", + }, + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: nil, + }, + "should skip hostname mount if the old sandbox doesn't have hostname file": { + statFn: func(path string) (os.FileInfo, error) { + assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path) + return nil, errors.New("random error") + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Mounts: test.criMounts, + Linux: &runtime.LinuxContainerConfig{ + SecurityContext: test.securityContext, + }, + } + c := newTestCRIService() + c.os.(*ostesting.FakeOS).StatFn = test.statFn + mounts := c.containerMounts(testSandboxID, config) + assert.Equal(t, test.expectedMounts, mounts, desc) + }) + } +} + +func TestPrivilegedBindMount(t *testing.T) { + testPid := uint32(1234) + c := newTestCRIService() + testSandboxID := "sandbox-id" + testContainerName := "container-name" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + + for desc, test := range map[string]struct { + privileged bool + expectedSysFSRO bool + expectedCgroupFSRO bool + }{ + "sysfs and cgroupfs should mount as 'ro' by default": { + expectedSysFSRO: true, + expectedCgroupFSRO: true, + }, + "sysfs and cgroupfs should not mount as 'ro' if privileged": { + privileged: true, + expectedSysFSRO: false, + expectedCgroupFSRO: false, + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig.Linux.SecurityContext.Privileged = test.privileged + sandboxConfig.Linux.SecurityContext.Privileged = test.privileged + + spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + + assert.NoError(t, err) + if test.expectedSysFSRO { + checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"ro"}, []string{"rw"}) + } else { + checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"rw"}, []string{"ro"}) + } + if test.expectedCgroupFSRO { + checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, []string{"rw"}) + } else { + checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"rw"}, []string{"ro"}) + } + }) + } +} + +func TestMountPropagation(t *testing.T) { + + sharedLookupMountFn := func(string) (mount.Info, error) { + return mount.Info{ + Mountpoint: "host-path", + Optional: "shared:", + }, nil + } + + slaveLookupMountFn := func(string) (mount.Info, error) { + return mount.Info{ + Mountpoint: "host-path", + Optional: "master:", + }, nil + } + + othersLookupMountFn := func(string) (mount.Info, error) { + return mount.Info{ + Mountpoint: "host-path", + Optional: "others", + }, nil + } + + for desc, test := range map[string]struct { + criMount *runtime.Mount + fakeLookupMountFn func(string) (mount.Info, error) + optionsCheck []string + expectErr bool + }{ + "HostPath should mount as 'rprivate' if propagation is MountPropagation_PROPAGATION_PRIVATE": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_PRIVATE, + }, + fakeLookupMountFn: nil, + optionsCheck: []string{"rbind", "rprivate"}, + expectErr: false, + }, + "HostPath should mount as 'rslave' if propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, + }, + fakeLookupMountFn: slaveLookupMountFn, + optionsCheck: []string{"rbind", "rslave"}, + expectErr: false, + }, + "HostPath should mount as 'rshared' if propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, + }, + fakeLookupMountFn: sharedLookupMountFn, + optionsCheck: []string{"rbind", "rshared"}, + expectErr: false, + }, + "HostPath should mount as 'rprivate' if propagation is illegal": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation(42), + }, + fakeLookupMountFn: nil, + optionsCheck: []string{"rbind", "rprivate"}, + expectErr: false, + }, + "Expect an error if HostPath isn't shared and mount propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, + }, + fakeLookupMountFn: slaveLookupMountFn, + expectErr: true, + }, + "Expect an error if HostPath isn't slave or shared and mount propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": { + criMount: &runtime.Mount{ + ContainerPath: "container-path", + HostPath: "host-path", + Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, + }, + fakeLookupMountFn: othersLookupMountFn, + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.os.(*ostesting.FakeOS).LookupMountFn = test.fakeLookupMountFn + config, _, _, _ := getCreateContainerTestData() + + var spec runtimespec.Spec + spec.Linux = &runtimespec.Linux{} + + err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec) + if test.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + checkMount(t, spec.Mounts, test.criMount.HostPath, test.criMount.ContainerPath, "bind", test.optionsCheck, nil) + } + }) + } +} + +func TestPidNamespace(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + testSandboxID := "sandbox-id" + testContainerName := "container-name" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for desc, test := range map[string]struct { + pidNS runtime.NamespaceMode + expected runtimespec.LinuxNamespace + }{ + "node namespace mode": { + pidNS: runtime.NamespaceMode_NODE, + expected: runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + Path: opts.GetPIDNamespace(testPid), + }, + }, + "container namespace mode": { + pidNS: runtime.NamespaceMode_CONTAINER, + expected: runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + }, + }, + "pod namespace mode": { + pidNS: runtime.NamespaceMode_POD, + expected: runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + Path: opts.GetPIDNamespace(testPid), + }, + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{Pid: test.pidNS} + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + assert.Contains(t, spec.Linux.Namespaces, test.expected) + }) + } +} + +func TestNoDefaultRunMount(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + testSandboxID := "sandbox-id" + testContainerName := "container-name" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + for _, mount := range spec.Mounts { + assert.NotEqual(t, "/run", mount.Destination) + } +} + +func TestGenerateSeccompSecurityProfileSpecOpts(t *testing.T) { + for desc, test := range map[string]struct { + profile string + privileged bool + disable bool + specOpts oci.SpecOpts + expectErr bool + defaultProfile string + sp *runtime.SecurityProfile + }{ + "should return error if seccomp is specified when seccomp is not supported": { + profile: runtimeDefault, + disable: true, + expectErr: true, + }, + "should not return error if seccomp is not specified when seccomp is not supported": { + profile: "", + disable: true, + }, + "should not return error if seccomp is unconfined when seccomp is not supported": { + profile: unconfinedProfile, + disable: true, + }, + "should not set seccomp when privileged is true": { + profile: seccompDefaultProfile, + privileged: true, + }, + "should not set seccomp when seccomp is unconfined": { + profile: unconfinedProfile, + }, + "should not set seccomp when seccomp is not specified": { + profile: "", + }, + "should set default seccomp when seccomp is runtime/default": { + profile: runtimeDefault, + specOpts: seccomp.WithDefaultProfile(), + }, + "should set default seccomp when seccomp is docker/default": { + profile: dockerDefault, + specOpts: seccomp.WithDefaultProfile(), + }, + "should set specified profile when local profile is specified": { + profile: profileNamePrefix + "test-profile", + specOpts: seccomp.WithProfile("test-profile"), + }, + "should use default profile when seccomp is empty": { + defaultProfile: profileNamePrefix + "test-profile", + specOpts: seccomp.WithProfile("test-profile"), + }, + "should fallback to docker/default when seccomp is empty and default is runtime/default": { + defaultProfile: runtimeDefault, + specOpts: seccomp.WithDefaultProfile(), + }, + //----------------------------------------------- + // now buckets for the SecurityProfile variants + //----------------------------------------------- + "sp should return error if seccomp is specified when seccomp is not supported": { + disable: true, + expectErr: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, + }, + "sp should not return error if seccomp is unconfined when seccomp is not supported": { + disable: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Unconfined, + }, + }, + "sp should not set seccomp when privileged is true": { + privileged: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, + }, + "sp should not set seccomp when seccomp is unconfined": { + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Unconfined, + }, + }, + "sp should not set seccomp when seccomp is not specified": {}, + "sp should set default seccomp when seccomp is runtime/default": { + specOpts: seccomp.WithDefaultProfile(), + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, + }, + "sp should set specified profile when local profile is specified": { + specOpts: seccomp.WithProfile("test-profile"), + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Localhost, + LocalhostRef: profileNamePrefix + "test-profile", + }, + }, + "sp should set specified profile when local profile is specified even without prefix": { + specOpts: seccomp.WithProfile("test-profile"), + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Localhost, + LocalhostRef: "test-profile", + }, + }, + "sp should return error if specified profile is invalid": { + expectErr: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + LocalhostRef: "test-profile", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + cri := &criService{} + cri.config.UnsetSeccompProfile = test.defaultProfile + ssp := test.sp + csp, err := generateSeccompSecurityProfile( + test.profile, + test.defaultProfile) + if err != nil { + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } else { + if ssp == nil { + ssp = csp + } + specOpts, err := cri.generateSeccompSpecOpts(ssp, test.privileged, !test.disable) + assert.Equal(t, + reflect.ValueOf(test.specOpts).Pointer(), + reflect.ValueOf(specOpts).Pointer()) + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } + }) + } +} + +func TestGenerateApparmorSpecOpts(t *testing.T) { + for desc, test := range map[string]struct { + profile string + privileged bool + disable bool + specOpts oci.SpecOpts + expectErr bool + sp *runtime.SecurityProfile + }{ + "should return error if apparmor is specified when apparmor is not supported": { + profile: runtimeDefault, + disable: true, + expectErr: true, + }, + "should not return error if apparmor is not specified when apparmor is not supported": { + profile: "", + disable: true, + }, + "should set default apparmor when apparmor is not specified": { + profile: "", + specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), + }, + "should not apparmor when apparmor is not specified and privileged is true": { + profile: "", + privileged: true, + }, + "should not return error if apparmor is unconfined when apparmor is not supported": { + profile: unconfinedProfile, + disable: true, + }, + "should not apparmor when apparmor is unconfined": { + profile: unconfinedProfile, + }, + "should not apparmor when apparmor is unconfined and privileged is true": { + profile: unconfinedProfile, + privileged: true, + }, + "should set default apparmor when apparmor is runtime/default": { + profile: runtimeDefault, + specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), + }, + "should not apparmor when apparmor is default and privileged is true": { + profile: runtimeDefault, + privileged: true, + }, + // TODO (mikebrow) add success with existing defined profile tests + "should return error when undefined local profile is specified": { + profile: profileNamePrefix + "test-profile", + expectErr: true, + }, + "should return error when undefined local profile is specified and privileged is true": { + profile: profileNamePrefix + "test-profile", + privileged: true, + expectErr: true, + }, + "should return error if specified profile is invalid": { + profile: "test-profile", + expectErr: true, + }, + //-------------------------------------- + // buckets for SecurityProfile struct + //-------------------------------------- + "sp should return error if apparmor is specified when apparmor is not supported": { + disable: true, + expectErr: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, + }, + "sp should not return error if apparmor is unconfined when apparmor is not supported": { + disable: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Unconfined, + }, + }, + "sp should not apparmor when apparmor is unconfined": { + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Unconfined, + }, + }, + "sp should not apparmor when apparmor is unconfined and privileged is true": { + privileged: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Unconfined, + }, + }, + "sp should set default apparmor when apparmor is runtime/default": { + specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, + }, + "sp should not apparmor when apparmor is default and privileged is true": { + privileged: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_RuntimeDefault, + }, + }, + "sp should return error when undefined local profile is specified": { + expectErr: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Localhost, + LocalhostRef: profileNamePrefix + "test-profile", + }, + }, + "sp should return error when undefined local profile is specified even without prefix": { + profile: profileNamePrefix + "test-profile", + expectErr: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Localhost, + LocalhostRef: "test-profile", + }, + }, + "sp should return error when undefined local profile is specified and privileged is true": { + privileged: true, + expectErr: true, + sp: &runtime.SecurityProfile{ + ProfileType: runtime.SecurityProfile_Localhost, + LocalhostRef: profileNamePrefix + "test-profile", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + asp := test.sp + csp, err := generateApparmorSecurityProfile(test.profile) + if err != nil { + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } else { + if asp == nil { + asp = csp + } + specOpts, err := generateApparmorSpecOpts(asp, test.privileged, !test.disable) + assert.Equal(t, + reflect.ValueOf(test.specOpts).Pointer(), + reflect.ValueOf(specOpts).Pointer()) + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + } + }) + } +} + +func TestMaskedAndReadonlyPaths(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + + defaultSpec, err := oci.GenerateSpec(ctrdutil.NamespacedContext(), nil, &containers.Container{ID: testID}) + require.NoError(t, err) + + for desc, test := range map[string]struct { + disableProcMount bool + masked []string + readonly []string + expectedMasked []string + expectedReadonly []string + privileged bool + }{ + "should apply default if not specified when disable_proc_mount = true": { + disableProcMount: true, + masked: nil, + readonly: nil, + expectedMasked: defaultSpec.Linux.MaskedPaths, + expectedReadonly: defaultSpec.Linux.ReadonlyPaths, + privileged: false, + }, + "should apply default if not specified when disable_proc_mount = false": { + disableProcMount: false, + masked: nil, + readonly: nil, + expectedMasked: []string{}, + expectedReadonly: []string{}, + privileged: false, + }, + "should be able to specify empty paths": { + masked: []string{}, + readonly: []string{}, + expectedMasked: []string{}, + expectedReadonly: []string{}, + privileged: false, + }, + "should apply CRI specified paths": { + masked: []string{"/proc"}, + readonly: []string{"/sys"}, + expectedMasked: []string{"/proc"}, + expectedReadonly: []string{"/sys"}, + privileged: false, + }, + "default should be nil for privileged": { + expectedMasked: nil, + expectedReadonly: nil, + privileged: true, + }, + "should be able to specify empty paths, esp. if privileged": { + masked: []string{}, + readonly: []string{}, + expectedMasked: nil, + expectedReadonly: nil, + privileged: true, + }, + "should not apply CRI specified paths if privileged": { + masked: []string{"/proc"}, + readonly: []string{"/sys"}, + expectedMasked: nil, + expectedReadonly: nil, + privileged: true, + }, + } { + t.Run(desc, func(t *testing.T) { + c.config.DisableProcMount = test.disableProcMount + containerConfig.Linux.SecurityContext.MaskedPaths = test.masked + containerConfig.Linux.SecurityContext.ReadonlyPaths = test.readonly + containerConfig.Linux.SecurityContext.Privileged = test.privileged + sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + Privileged: test.privileged, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + if !test.privileged { // specCheck presumes an unprivileged container + specCheck(t, testID, testSandboxID, testPid, spec) + } + assert.Equal(t, test.expectedMasked, spec.Linux.MaskedPaths) + assert.Equal(t, test.expectedReadonly, spec.Linux.ReadonlyPaths) + }) + } +} + +func TestHostname(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { + return "real-hostname", nil + } + for desc, test := range map[string]struct { + hostname string + networkNs runtime.NamespaceMode + expectedEnv string + }{ + "should add HOSTNAME=sandbox.Hostname for pod network namespace": { + hostname: "test-hostname", + networkNs: runtime.NamespaceMode_POD, + expectedEnv: "HOSTNAME=test-hostname", + }, + "should add HOSTNAME=sandbox.Hostname for host network namespace": { + hostname: "test-hostname", + networkNs: runtime.NamespaceMode_NODE, + expectedEnv: "HOSTNAME=test-hostname", + }, + "should add HOSTNAME=os.Hostname for host network namespace if sandbox.Hostname is not set": { + hostname: "", + networkNs: runtime.NamespaceMode_NODE, + expectedEnv: "HOSTNAME=real-hostname", + }, + } { + t.Run(desc, func(t *testing.T) { + sandboxConfig.Hostname = test.hostname + sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{Network: test.networkNs}, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.Contains(t, spec.Process.Env, test.expectedEnv) + }) + } +} + +func TestDisableCgroup(t *testing.T) { + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + c.config.DisableCgroup = true + spec, err := c.containerSpec("test-id", "sandbox-id", 1234, "", "container-name", testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + + t.Log("resource limit should not be set") + assert.Nil(t, spec.Linux.Resources.Memory) + assert.Nil(t, spec.Linux.Resources.CPU) + + t.Log("cgroup path should be empty") + assert.Empty(t, spec.Linux.CgroupsPath) +} + +func TestGenerateUserString(t *testing.T) { + type testcase struct { + // the name of the test case + name string + + u string + uid, gid *runtime.Int64Value + + result string + expectedError bool + } + testcases := []testcase{ + { + name: "Empty", + result: "", + }, + { + name: "Username Only", + u: "testuser", + result: "testuser", + }, + { + name: "Username, UID", + u: "testuser", + uid: &runtime.Int64Value{Value: 1}, + result: "testuser", + }, + { + name: "Username, UID, GID", + u: "testuser", + uid: &runtime.Int64Value{Value: 1}, + gid: &runtime.Int64Value{Value: 10}, + result: "testuser:10", + }, + { + name: "Username, GID", + u: "testuser", + gid: &runtime.Int64Value{Value: 10}, + result: "testuser:10", + }, + { + name: "UID only", + uid: &runtime.Int64Value{Value: 1}, + result: "1", + }, + { + name: "UID, GID", + uid: &runtime.Int64Value{Value: 1}, + gid: &runtime.Int64Value{Value: 10}, + result: "1:10", + }, + { + name: "GID only", + gid: &runtime.Int64Value{Value: 10}, + result: "", + expectedError: true, + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + r, err := generateUserString(tc.u, tc.uid, tc.gid) + if tc.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + assert.Equal(t, tc.result, r) + }) + } +} + +func TestNonRootUserAndDevices(t *testing.T) { + testPid := uint32(1234) + c := newTestCRIService() + testSandboxID := "sandbox-id" + testContainerName := "container-name" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + + hostDevicesRaw, err := oci.HostDevices() + assert.NoError(t, err) + + testDevice := hostDevicesRaw[0] + + for desc, test := range map[string]struct { + uid, gid *runtime.Int64Value + deviceOwnershipFromSecurityContext bool + expectedDeviceUID uint32 + expectedDeviceGID uint32 + }{ + "expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": { + uid: &runtime.Int64Value{Value: 1}, + gid: &runtime.Int64Value{Value: 10}, + expectedDeviceUID: *testDevice.UID, + expectedDeviceGID: *testDevice.GID, + }, + "expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": { + uid: &runtime.Int64Value{Value: 0}, + gid: &runtime.Int64Value{Value: 0}, + expectedDeviceUID: *testDevice.UID, + expectedDeviceGID: *testDevice.GID, + }, + "expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled": { + uid: &runtime.Int64Value{Value: 1}, + gid: &runtime.Int64Value{Value: 10}, + deviceOwnershipFromSecurityContext: true, + expectedDeviceUID: 1, + expectedDeviceGID: 10, + }, + "expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled": { + uid: &runtime.Int64Value{Value: 0}, + gid: &runtime.Int64Value{Value: 0}, + deviceOwnershipFromSecurityContext: true, + expectedDeviceUID: *testDevice.UID, + expectedDeviceGID: *testDevice.GID, + }, + } { + t.Run(desc, func(t *testing.T) { + c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext + containerConfig.Linux.SecurityContext.RunAsUser = test.uid + containerConfig.Linux.SecurityContext.RunAsGroup = test.gid + containerConfig.Devices = []*runtime.Device{ + { + ContainerPath: testDevice.Path, + HostPath: testDevice.Path, + Permissions: "r", + }, + } + + spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + assert.NoError(t, err) + + assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID) + assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID) + }) + } +} + +func TestPrivilegedDevices(t *testing.T) { + testPid := uint32(1234) + c := newTestCRIService() + testSandboxID := "sandbox-id" + testContainerName := "container-name" + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + + for desc, test := range map[string]struct { + privileged bool + privilegedWithoutHostDevices bool + privilegedWithoutHostDevicesAllDevicesAllowed bool + expectHostDevices bool + expectAllDevicesAllowed bool + }{ + "expect no host devices when privileged is false": { + privileged: false, + privilegedWithoutHostDevices: false, + privilegedWithoutHostDevicesAllDevicesAllowed: false, + expectHostDevices: false, + expectAllDevicesAllowed: false, + }, + "expect no host devices when privileged is false and privilegedWithoutHostDevices is true": { + privileged: false, + privilegedWithoutHostDevices: true, + privilegedWithoutHostDevicesAllDevicesAllowed: false, + expectHostDevices: false, + expectAllDevicesAllowed: false, + }, + "expect host devices and all device allowlist when privileged is true": { + privileged: true, + privilegedWithoutHostDevices: false, + privilegedWithoutHostDevicesAllDevicesAllowed: false, + expectHostDevices: true, + expectAllDevicesAllowed: true, + }, + "expect no host devices when privileged is true and privilegedWithoutHostDevices is true": { + privileged: true, + privilegedWithoutHostDevices: true, + privilegedWithoutHostDevicesAllDevicesAllowed: false, + expectHostDevices: false, + expectAllDevicesAllowed: false, + }, + "expect host devices and all devices allowlist when privileged is true and privilegedWithoutHostDevices is true and privilegedWithoutHostDevicesAllDevicesAllowed is true": { + privileged: true, + privilegedWithoutHostDevices: true, + privilegedWithoutHostDevicesAllDevicesAllowed: true, + expectHostDevices: false, + expectAllDevicesAllowed: true, + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig.Linux.SecurityContext.Privileged = test.privileged + sandboxConfig.Linux.SecurityContext.Privileged = test.privileged + + ociRuntime := config.Runtime{ + PrivilegedWithoutHostDevices: test.privilegedWithoutHostDevices, + PrivilegedWithoutHostDevicesAllDevicesAllowed: test.privilegedWithoutHostDevicesAllDevicesAllowed, + } + spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + + hostDevicesRaw, err := oci.HostDevices() + assert.NoError(t, err) + var hostDevices = make([]string, 0) + for _, dev := range hostDevicesRaw { + // https://github.com/containerd/cri/pull/1521#issuecomment-652807951 + if dev.Major != 0 { + hostDevices = append(hostDevices, dev.Path) + } + } + + if test.expectHostDevices { + assert.Len(t, spec.Linux.Devices, len(hostDevices)) + } else { + assert.Empty(t, spec.Linux.Devices) + } + + assert.Len(t, spec.Linux.Resources.Devices, 1) + assert.Equal(t, spec.Linux.Resources.Devices[0].Allow, test.expectAllDevicesAllowed) + assert.Equal(t, spec.Linux.Resources.Devices[0].Access, "rwm") + }) + } +} + +func TestBaseOCISpec(t *testing.T) { + c := newTestCRIService() + baseLimit := int64(100) + c.baseOCISpecs = map[string]*oci.Spec{ + "/etc/containerd/cri-base.json": { + Process: &runtimespec.Process{ + User: runtimespec.User{AdditionalGids: []uint32{9999}}, + Capabilities: &runtimespec.LinuxCapabilities{ + Permitted: []string{"CAP_SETUID"}, + }, + }, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: &baseLimit}, // Will be overwritten by `getCreateContainerTestData` + }, + }, + }, + } + + ociRuntime := config.Runtime{} + ociRuntime.BaseRuntimeSpec = "/etc/containerd/cri-base.json" + + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + + specCheck(t, testID, testSandboxID, testPid, spec) + + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(9999)) + assert.Len(t, spec.Process.User.AdditionalGids, 3) + + assert.Contains(t, spec.Process.Capabilities.Permitted, "CAP_SETUID") + assert.Len(t, spec.Process.Capabilities.Permitted, 1) + + assert.Equal(t, *spec.Linux.Resources.Memory.Limit, containerConfig.Linux.Resources.MemoryLimitInBytes) +} + +func writeFilesToTempDir(tmpDirPattern string, content []string) (string, error) { + if len(content) == 0 { + return "", nil + } + + dir, err := ioutil.TempDir("", tmpDirPattern) + if err != nil { + return "", err + } + + for idx, data := range content { + file := filepath.Join(dir, fmt.Sprintf("spec-%d.yaml", idx)) + err := ioutil.WriteFile(file, []byte(data), 0644) + if err != nil { + return "", err + } + } + + return dir, nil +} + +func TestCDIInjections(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + + for _, test := range []struct { + description string + cdiSpecFiles []string + annotations map[string]string + expectError bool + expectDevices []runtimespec.LinuxDevice + expectEnv []string + }{ + {description: "expect no CDI error for nil annotations"}, + {description: "expect no CDI error for empty annotations", + annotations: map[string]string{}, + }, + {description: "expect CDI error for invalid CDI device reference in annotations", + annotations: map[string]string{ + cdi.AnnotationPrefix + "devices": "foobar", + }, + expectError: true, + }, + {description: "expect CDI error for unresolvable devices", + annotations: map[string]string{ + cdi.AnnotationPrefix + "vendor1_devices": "vendor1.com/device=no-such-dev", + }, + expectError: true, + }, + {description: "expect properly injected resolvable CDI devices", + cdiSpecFiles: []string{ + ` +cdiVersion: "0.2.0" +kind: "vendor1.com/device" +devices: + - name: foo + containerEdits: + deviceNodes: + - path: /dev/loop8 + type: b + major: 7 + minor: 8 + env: + - FOO=injected +containerEdits: + env: + - "VENDOR1=present" +`, + ` +cdiVersion: "0.2.0" +kind: "vendor2.com/device" +devices: + - name: bar + containerEdits: + deviceNodes: + - path: /dev/loop9 + type: b + major: 7 + minor: 9 + env: + - BAR=injected +containerEdits: + env: + - "VENDOR2=present" +`, + }, + annotations: map[string]string{ + cdi.AnnotationPrefix + "vendor1_devices": "vendor1.com/device=foo", + cdi.AnnotationPrefix + "vendor2_devices": "vendor2.com/device=bar", + }, + expectDevices: []runtimespec.LinuxDevice{ + { + Path: "/dev/loop8", + Type: "b", + Major: 7, + Minor: 8, + }, + { + Path: "/dev/loop9", + Type: "b", + Major: 7, + Minor: 9, + }, + }, + expectEnv: []string{ + "FOO=injected", + "VENDOR1=present", + "BAR=injected", + "VENDOR2=present", + }, + }, + } { + t.Run(test.description, func(t *testing.T) { + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + + specCheck(t, testID, testSandboxID, testPid, spec) + + cdiDir, err := writeFilesToTempDir("containerd-test-CDI-injections-", test.cdiSpecFiles) + if cdiDir != "" { + defer os.RemoveAll(cdiDir) + } + require.NoError(t, err) + + injectFun := oci.WithCDI(test.annotations, []string{cdiDir}) + err = injectFun(nil, nil, nil, spec) + assert.Equal(t, test.expectError, err != nil) + + if err != nil { + if test.expectEnv != nil { + for _, expectedEnv := range test.expectEnv { + assert.Contains(t, spec.Process.Env, expectedEnv) + } + } + if test.expectDevices != nil { + for _, expectedDev := range test.expectDevices { + assert.Contains(t, spec.Linux.Devices, expectedDev) + } + } + } + }) + } +} diff --git a/pkg/cri/sbserver/container_create_other.go b/pkg/cri/sbserver/container_create_other.go new file mode 100644 index 000000000..8119ccb2e --- /dev/null +++ b/pkg/cri/sbserver/container_create_other.go @@ -0,0 +1,61 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/snapshots" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/config" +) + +// containerMounts sets up necessary container system file mounts +// including /dev/shm, /etc/hosts and /etc/resolv.conf. +func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { + return []*runtime.Mount{} +} + +func (c *criService) containerSpec( + id string, + sandboxID string, + sandboxPid uint32, + netNSPath string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime config.Runtime, +) (_ *runtimespec.Spec, retErr error) { + return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec) +} + +func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + return []oci.SpecOpts{}, nil +} + +// snapshotterOpts returns snapshotter options for the rootfs snapshot +func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { + return []snapshots.Opt{} +} diff --git a/pkg/cri/sbserver/container_create_other_test.go b/pkg/cri/sbserver/container_create_other_test.go new file mode 100644 index 000000000..a12431a85 --- /dev/null +++ b/pkg/cri/sbserver/container_create_other_test.go @@ -0,0 +1,41 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// checkMount is defined by all tests but not used here +var _ = checkMount + +func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, + *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { + config := &runtime.ContainerConfig{} + sandboxConfig := &runtime.PodSandboxConfig{} + imageConfig := &imagespec.ImageConfig{} + specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { + } + return config, sandboxConfig, imageConfig, specCheck +} diff --git a/pkg/cri/sbserver/container_create_test.go b/pkg/cri/sbserver/container_create_test.go new file mode 100644 index 000000000..65e42b34d --- /dev/null +++ b/pkg/cri/sbserver/container_create_test.go @@ -0,0 +1,458 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "path/filepath" + goruntime "runtime" + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/pkg/cri/config" + "github.com/containerd/containerd/pkg/cri/constants" + "github.com/containerd/containerd/pkg/cri/opts" +) + +func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string, + contains, notcontains []string) { + found := false + for _, m := range mounts { + if m.Source == src && m.Destination == dest { + assert.Equal(t, m.Type, typ) + for _, c := range contains { + assert.Contains(t, m.Options, c) + } + for _, n := range notcontains { + assert.NotContains(t, m.Options, n) + } + found = true + break + } + } + assert.True(t, found, "mount from %q to %q not found", src, dest) +} + +const testImageName = "container-image-name" + +func TestGeneralContainerSpec(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + testSandboxID := "sandbox-id" + testContainerName := "container-name" + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + require.NoError(t, err) + specCheck(t, testID, testSandboxID, testPid, spec) +} + +func TestPodAnnotationPassthroughContainerSpec(t *testing.T) { + switch goruntime.GOOS { + case "darwin": + t.Skip("not implemented on Darwin") + case "freebsd": + t.Skip("not implemented on FreeBSD") + } + + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + + for desc, test := range map[string]struct { + podAnnotations []string + configChange func(*runtime.PodSandboxConfig) + specCheck func(*testing.T, *runtimespec.Spec) + }{ + "a passthrough annotation should be passed as an OCI annotation": { + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + }, + }, + "a non-passthrough annotation should not be passed as an OCI annotation": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["d"] = "e" + }, + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + _, ok := spec.Annotations["d"] + assert.False(t, ok) + }, + }, + "passthrough annotations should support wildcard match": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["t.f"] = "j" + c.Annotations["z.g"] = "o" + c.Annotations["z"] = "o" + c.Annotations["y.ca"] = "b" + c.Annotations["y"] = "b" + }, + podAnnotations: []string{"t*", "z.*", "y.c*"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + t.Logf("%+v", spec.Annotations) + assert.Equal(t, spec.Annotations["t.f"], "j") + assert.Equal(t, spec.Annotations["z.g"], "o") + assert.Equal(t, spec.Annotations["y.ca"], "b") + _, ok := spec.Annotations["y"] + assert.False(t, ok) + _, ok = spec.Annotations["z"] + assert.False(t, ok) + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + if test.configChange != nil { + test.configChange(sandboxConfig) + } + + ociRuntime := config.Runtime{ + PodAnnotations: test.podAnnotations, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, + containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, testSandboxID, testPid, spec) + if test.specCheck != nil { + test.specCheck(t, spec) + } + }) + } +} + +func TestContainerSpecCommand(t *testing.T) { + for desc, test := range map[string]struct { + criEntrypoint []string + criArgs []string + imageEntrypoint []string + imageArgs []string + expected []string + expectErr bool + }{ + "should use cri entrypoint if it's specified": { + criEntrypoint: []string{"a", "b"}, + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"a", "b"}, + }, + "should use cri entrypoint if it's specified even if it's empty": { + criEntrypoint: []string{}, + criArgs: []string{"a", "b"}, + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"a", "b"}, + }, + "should use cri entrypoint and args if they are specified": { + criEntrypoint: []string{"a", "b"}, + criArgs: []string{"c", "d"}, + imageEntrypoint: []string{"e", "f"}, + imageArgs: []string{"g", "h"}, + expected: []string{"a", "b", "c", "d"}, + }, + "should use image entrypoint if cri entrypoint is not specified": { + criArgs: []string{"a", "b"}, + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"c", "d", "a", "b"}, + }, + "should use image args if both cri entrypoint and args are not specified": { + imageEntrypoint: []string{"c", "d"}, + imageArgs: []string{"e", "f"}, + expected: []string{"c", "d", "e", "f"}, + }, + "should return error if both entrypoint and args are empty": { + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + config, _, imageConfig, _ := getCreateContainerTestData() + config.Command = test.criEntrypoint + config.Args = test.criArgs + imageConfig.Entrypoint = test.imageEntrypoint + imageConfig.Cmd = test.imageArgs + + var spec runtimespec.Spec + err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec) + if test.expectErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + assert.Equal(t, test.expected, spec.Process.Args, desc) + }) + } +} + +func TestVolumeMounts(t *testing.T) { + testContainerRootDir := "test-container-root" + for desc, test := range map[string]struct { + criMounts []*runtime.Mount + imageVolumes map[string]struct{} + expectedMountDest []string + }{ + "should setup rw mount for image volumes": { + imageVolumes: map[string]struct{}{ + "/test-volume-1": {}, + "/test-volume-2": {}, + }, + expectedMountDest: []string{ + "/test-volume-1", + "/test-volume-2", + }, + }, + "should skip image volumes if already mounted by CRI": { + criMounts: []*runtime.Mount{ + { + ContainerPath: "/test-volume-1", + HostPath: "/test-hostpath-1", + }, + }, + imageVolumes: map[string]struct{}{ + "/test-volume-1": {}, + "/test-volume-2": {}, + }, + expectedMountDest: []string{ + "/test-volume-2", + }, + }, + "should compare and return cleanpath": { + criMounts: []*runtime.Mount{ + { + ContainerPath: "/test-volume-1", + HostPath: "/test-hostpath-1", + }, + }, + imageVolumes: map[string]struct{}{ + "/test-volume-1/": {}, + "/test-volume-2/": {}, + }, + expectedMountDest: []string{ + "/test-volume-2/", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + config := &imagespec.ImageConfig{ + Volumes: test.imageVolumes, + } + c := newTestCRIService() + got := c.volumeMounts(testContainerRootDir, test.criMounts, config) + assert.Len(t, got, len(test.expectedMountDest)) + for _, dest := range test.expectedMountDest { + found := false + for _, m := range got { + if m.ContainerPath == dest { + found = true + assert.Equal(t, + filepath.Dir(m.HostPath), + filepath.Join(testContainerRootDir, "volumes")) + break + } + } + assert.True(t, found) + } + }) + } +} + +func TestContainerAnnotationPassthroughContainerSpec(t *testing.T) { + switch goruntime.GOOS { + case "darwin": + t.Skip("not implemented on Darwin") + case "freebsd": + t.Skip("not implemented on FreeBSD") + } + + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + + for desc, test := range map[string]struct { + podAnnotations []string + containerAnnotations []string + podConfigChange func(*runtime.PodSandboxConfig) + configChange func(*runtime.ContainerConfig) + specCheck func(*testing.T, *runtimespec.Spec) + }{ + "passthrough annotations from pod and container should be passed as an OCI annotation": { + podConfigChange: func(p *runtime.PodSandboxConfig) { + p.Annotations["pod.annotation.1"] = "1" + p.Annotations["pod.annotation.2"] = "2" + p.Annotations["pod.annotation.3"] = "3" + }, + configChange: func(c *runtime.ContainerConfig) { + c.Annotations["container.annotation.1"] = "1" + c.Annotations["container.annotation.2"] = "2" + c.Annotations["container.annotation.3"] = "3" + }, + podAnnotations: []string{"pod.annotation.1"}, + containerAnnotations: []string{"container.annotation.1"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, "1", spec.Annotations["container.annotation.1"]) + _, ok := spec.Annotations["container.annotation.2"] + assert.False(t, ok) + _, ok = spec.Annotations["container.annotation.3"] + assert.False(t, ok) + assert.Equal(t, "1", spec.Annotations["pod.annotation.1"]) + _, ok = spec.Annotations["pod.annotation.2"] + assert.False(t, ok) + _, ok = spec.Annotations["pod.annotation.3"] + assert.False(t, ok) + }, + }, + "passthrough annotations from pod and container should support wildcard": { + podConfigChange: func(p *runtime.PodSandboxConfig) { + p.Annotations["pod.annotation.1"] = "1" + p.Annotations["pod.annotation.2"] = "2" + p.Annotations["pod.annotation.3"] = "3" + }, + configChange: func(c *runtime.ContainerConfig) { + c.Annotations["container.annotation.1"] = "1" + c.Annotations["container.annotation.2"] = "2" + c.Annotations["container.annotation.3"] = "3" + }, + podAnnotations: []string{"pod.annotation.*"}, + containerAnnotations: []string{"container.annotation.*"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, "1", spec.Annotations["container.annotation.1"]) + assert.Equal(t, "2", spec.Annotations["container.annotation.2"]) + assert.Equal(t, "3", spec.Annotations["container.annotation.3"]) + assert.Equal(t, "1", spec.Annotations["pod.annotation.1"]) + assert.Equal(t, "2", spec.Annotations["pod.annotation.2"]) + assert.Equal(t, "3", spec.Annotations["pod.annotation.3"]) + }, + }, + "annotations should not pass through if no passthrough annotations are configured": { + podConfigChange: func(p *runtime.PodSandboxConfig) { + p.Annotations["pod.annotation.1"] = "1" + p.Annotations["pod.annotation.2"] = "2" + p.Annotations["pod.annotation.3"] = "3" + }, + configChange: func(c *runtime.ContainerConfig) { + c.Annotations["container.annotation.1"] = "1" + c.Annotations["container.annotation.2"] = "2" + c.Annotations["container.annotation.3"] = "3" + }, + podAnnotations: []string{}, + containerAnnotations: []string{}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + _, ok := spec.Annotations["container.annotation.1"] + assert.False(t, ok) + _, ok = spec.Annotations["container.annotation.2"] + assert.False(t, ok) + _, ok = spec.Annotations["container.annotation.3"] + assert.False(t, ok) + _, ok = spec.Annotations["pod.annotation.1"] + assert.False(t, ok) + _, ok = spec.Annotations["pod.annotation.2"] + assert.False(t, ok) + _, ok = spec.Annotations["pod.annotation.3"] + assert.False(t, ok) + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + if test.configChange != nil { + test.configChange(containerConfig) + } + if test.podConfigChange != nil { + test.podConfigChange(sandboxConfig) + } + ociRuntime := config.Runtime{ + PodAnnotations: test.podAnnotations, + ContainerAnnotations: test.containerAnnotations, + } + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, + containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, testSandboxID, testPid, spec) + if test.specCheck != nil { + test.specCheck(t, spec) + } + }) + } +} + +func TestBaseRuntimeSpec(t *testing.T) { + c := newTestCRIService() + c.baseOCISpecs = map[string]*oci.Spec{ + "/etc/containerd/cri-base.json": { + Version: "1.0.2", + Hostname: "old", + }, + } + + out, err := c.runtimeSpec("id1", "/etc/containerd/cri-base.json", oci.WithHostname("new")) + assert.NoError(t, err) + + assert.Equal(t, "1.0.2", out.Version) + assert.Equal(t, "new", out.Hostname) + + // Make sure original base spec not changed + assert.NotEqual(t, out, c.baseOCISpecs["/etc/containerd/cri-base.json"]) + assert.Equal(t, c.baseOCISpecs["/etc/containerd/cri-base.json"].Hostname, "old") + + assert.Equal(t, filepath.Join("/", constants.K8sContainerdNamespace, "id1"), out.Linux.CgroupsPath) +} + +func TestRuntimeSnapshotter(t *testing.T) { + defaultRuntime := config.Runtime{ + Snapshotter: "", + } + + fooRuntime := config.Runtime{ + Snapshotter: "devmapper", + } + + for desc, test := range map[string]struct { + runtime config.Runtime + expectSnapshotter string + }{ + "should return default snapshotter when runtime.Snapshotter is not set": { + runtime: defaultRuntime, + expectSnapshotter: config.DefaultConfig().Snapshotter, + }, + "should return overridden snapshotter when runtime.Snapshotter is set": { + runtime: fooRuntime, + expectSnapshotter: "devmapper", + }, + } { + t.Run(desc, func(t *testing.T) { + cri := newTestCRIService() + cri.config = config.Config{ + PluginConfig: config.DefaultConfig(), + } + assert.Equal(t, test.expectSnapshotter, cri.runtimeSnapshotter(context.Background(), test.runtime)) + }) + } +} diff --git a/pkg/cri/sbserver/container_create_windows.go b/pkg/cri/sbserver/container_create_windows.go new file mode 100644 index 000000000..6cc98cff4 --- /dev/null +++ b/pkg/cri/sbserver/container_create_windows.go @@ -0,0 +1,163 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "errors" + "fmt" + "strconv" + + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/snapshots" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/config" + customopts "github.com/containerd/containerd/pkg/cri/opts" +) + +// No container mounts for windows. +func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { + return nil +} + +func (c *criService) containerSpec( + id string, + sandboxID string, + sandboxPid uint32, + netNSPath string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime config.Runtime, +) (*runtimespec.Spec, error) { + specOpts := []oci.SpecOpts{ + customopts.WithProcessArgs(config, imageConfig), + } + + // All containers in a pod need to have HostProcess set if it was set on the pod, + // and vice versa no containers in the pod can be HostProcess if the pods spec + // didn't have the field set. The only case that is valid is if these are the same value. + cntrHpc := config.GetWindows().GetSecurityContext().GetHostProcess() + sandboxHpc := sandboxConfig.GetWindows().GetSecurityContext().GetHostProcess() + if cntrHpc != sandboxHpc { + return nil, errors.New("pod spec and all containers inside must have the HostProcess field set to be valid") + } + + if config.GetWorkingDir() != "" { + specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) + } else if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if config.GetTty() { + specOpts = append(specOpts, oci.WithTTY) + } + + // Apply envs from image config first, so that envs from container config + // can override them. + env := append([]string{}, imageConfig.Env...) + for _, e := range config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + specOpts = append(specOpts, oci.WithEnv(env)) + + specOpts = append(specOpts, + // Clear the root location since hcsshim expects it. + // NOTE: readonly rootfs doesn't work on windows. + customopts.WithoutRoot, + customopts.WithWindowsNetworkNamespace(netNSPath), + oci.WithHostname(sandboxConfig.GetHostname()), + ) + + specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts), customopts.WithDevices(config)) + + // Start with the image config user and override below if RunAsUsername is not "". + username := imageConfig.User + + windowsConfig := config.GetWindows() + if windowsConfig != nil { + specOpts = append(specOpts, customopts.WithWindowsResources(windowsConfig.GetResources())) + securityCtx := windowsConfig.GetSecurityContext() + if securityCtx != nil { + runAsUser := securityCtx.GetRunAsUsername() + if runAsUser != "" { + username = runAsUser + } + cs := securityCtx.GetCredentialSpec() + if cs != "" { + specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs)) + } + } + } + + // There really isn't a good Windows way to verify that the username is available in the + // image as early as here like there is for Linux. Later on in the stack hcsshim + // will handle the behavior of erroring out if the user isn't available in the image + // when trying to run the init process. + specOpts = append(specOpts, oci.WithUser(username)) + + for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, + ociRuntime.PodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + ociRuntime.ContainerAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, + customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), + customopts.WithAnnotation(annotations.SandboxID, sandboxID), + customopts.WithAnnotation(annotations.SandboxNamespace, sandboxConfig.GetMetadata().GetNamespace()), + customopts.WithAnnotation(annotations.SandboxName, sandboxConfig.GetMetadata().GetName()), + customopts.WithAnnotation(annotations.ContainerName, containerName), + customopts.WithAnnotation(annotations.ImageName, imageName), + customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(sandboxHpc)), + ) + return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...) +} + +// No extra spec options needed for windows. +func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + return nil, nil +} + +// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer +func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { + var opts []snapshots.Opt + + switch snapshotterName { + case "windows": + rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes() + if rootfsSize != 0 { + sizeStr := fmt.Sprintf("%d", rootfsSize) + labels := map[string]string{ + "containerd.io/snapshot/windows/rootfs.sizebytes": sizeStr, + } + opts = append(opts, snapshots.WithLabels(labels)) + } + } + + return opts +} diff --git a/pkg/cri/sbserver/container_create_windows_test.go b/pkg/cri/sbserver/container_create_windows_test.go new file mode 100644 index 000000000..2a1e7d954 --- /dev/null +++ b/pkg/cri/sbserver/container_create_windows_test.go @@ -0,0 +1,247 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/config" +) + +func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, + *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Image: &runtime.ImageSpec{ + Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", + }, + Command: []string{"test", "command"}, + Args: []string{"test", "args"}, + WorkingDir: "test-cwd", + Envs: []*runtime.KeyValue{ + {Key: "k1", Value: "v1"}, + {Key: "k2", Value: "v2"}, + {Key: "k3", Value: "v3=v3bis"}, + {Key: "k4", Value: "v4=v4bis=foop"}, + }, + Mounts: []*runtime.Mount{ + // everything default + { + ContainerPath: "container-path-1", + HostPath: "host-path-1", + }, + // readOnly + { + ContainerPath: "container-path-2", + HostPath: "host-path-2", + Readonly: true, + }, + }, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + Windows: &runtime.WindowsContainerConfig{ + Resources: &runtime.WindowsContainerResources{ + CpuShares: 100, + CpuCount: 200, + CpuMaximum: 300, + MemoryLimitInBytes: 400, + }, + SecurityContext: &runtime.WindowsContainerSecurityContext{ + RunAsUsername: "test-user", + CredentialSpec: "{\"test\": \"spec\"}", + HostProcess: false, + }, + }, + } + sandboxConfig := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-sandbox-name", + Uid: "test-sandbox-uid", + Namespace: "test-sandbox-ns", + Attempt: 2, + }, + Windows: &runtime.WindowsPodSandboxConfig{}, + Hostname: "test-hostname", + Annotations: map[string]string{"c": "d"}, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, + Entrypoint: []string{"/entrypoint"}, + Cmd: []string{"cmd"}, + WorkingDir: "/workspace", + User: "ContainerUser", + } + specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { + assert.Nil(t, spec.Root) + assert.Equal(t, "test-hostname", spec.Hostname) + assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) + assert.Equal(t, "test-cwd", spec.Process.Cwd) + assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") + assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") + + t.Logf("Check bind mount") + checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "", []string{"rw"}, nil) + checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "", []string{"ro"}, nil) + + t.Logf("Check resource limits") + assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, 100) + assert.EqualValues(t, *spec.Windows.Resources.CPU.Count, 200) + assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300) + assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300) + assert.EqualValues(t, *spec.Windows.Resources.Memory.Limit, 400) + + // Also checks if override of the image configs user is behaving. + t.Logf("Check username") + assert.Contains(t, spec.Process.User.Username, "test-user") + + t.Logf("Check credential spec") + assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}") + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) + + assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) + assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns") + + assert.Contains(t, spec.Annotations, annotations.SandboxName) + assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name") + + assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess) + assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false") + } + return config, sandboxConfig, imageConfig, specCheck +} + +func TestContainerWindowsNetworkNamespace(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + nsPath := "test-cni" + c := newTestCRIService() + + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, testSandboxID, testPid, spec) + assert.NotNil(t, spec.Windows) + assert.NotNil(t, spec.Windows.Network) + assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace) +} + +func TestMountCleanPath(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + nsPath := "test-cni" + c := newTestCRIService() + + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{ + ContainerPath: "c:/test/container-path", + HostPath: "c:/test/host-path", + }) + spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, testSandboxID, testPid, spec) + checkMount(t, spec.Mounts, "c:\\test\\host-path", "c:\\test\\container-path", "", []string{"rw"}, nil) +} + +func TestMountNamedPipe(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + nsPath := "test-cni" + c := newTestCRIService() + + containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() + containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{ + ContainerPath: `\\.\pipe\foo`, + HostPath: `\\.\pipe\foo`, + }) + spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, testSandboxID, testPid, spec) + checkMount(t, spec.Mounts, `\\.\pipe\foo`, `\\.\pipe\foo`, "", []string{"rw"}, nil) +} + +func TestHostProcessRequirements(t *testing.T) { + testID := "test-id" + testSandboxID := "sandbox-id" + testContainerName := "container-name" + testPid := uint32(1234) + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for desc, test := range map[string]struct { + containerHostProcess bool + sandboxHostProcess bool + expectError bool + }{ + "hostprocess container in non-hostprocess sandbox should fail": { + containerHostProcess: true, + sandboxHostProcess: false, + expectError: true, + }, + "hostprocess container in hostprocess sandbox should be fine": { + containerHostProcess: true, + sandboxHostProcess: true, + expectError: false, + }, + "non-hostprocess container in hostprocess sandbox should fail": { + containerHostProcess: false, + sandboxHostProcess: true, + expectError: true, + }, + "non-hostprocess container in non-hostprocess sandbox should be fine": { + containerHostProcess: false, + sandboxHostProcess: false, + expectError: false, + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig.Windows.SecurityContext.HostProcess = test.containerHostProcess + sandboxConfig.Windows.SecurityContext = &runtime.WindowsSandboxSecurityContext{ + HostProcess: test.sandboxHostProcess, + } + _, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + if test.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/pkg/cri/sbserver/container_exec.go b/pkg/cri/sbserver/container_exec.go new file mode 100644 index 000000000..10d716b27 --- /dev/null +++ b/pkg/cri/sbserver/container_exec.go @@ -0,0 +1,37 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// Exec prepares a streaming endpoint to execute a command in the container, and returns the address. +func (c *criService) Exec(ctx context.Context, r *runtime.ExecRequest) (*runtime.ExecResponse, error) { + cntr, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("failed to find container %q in store: %w", r.GetContainerId(), err) + } + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state)) + } + return c.streamServer.GetExec(r) +} diff --git a/pkg/cri/sbserver/container_execsync.go b/pkg/cri/sbserver/container_execsync.go new file mode 100644 index 000000000..4a3706fd4 --- /dev/null +++ b/pkg/cri/sbserver/container_execsync.go @@ -0,0 +1,252 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "bytes" + "context" + "fmt" + "io" + "syscall" + "time" + + "github.com/containerd/containerd" + containerdio "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/oci" + "k8s.io/client-go/tools/remotecommand" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + cio "github.com/containerd/containerd/pkg/cri/io" + "github.com/containerd/containerd/pkg/cri/util" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + cioutil "github.com/containerd/containerd/pkg/ioutil" +) + +type cappedWriter struct { + w io.WriteCloser + remain int +} + +func (cw *cappedWriter) Write(p []byte) (int, error) { + if cw.remain <= 0 { + return len(p), nil + } + + end := cw.remain + if end > len(p) { + end = len(p) + } + written, err := cw.w.Write(p[0:end]) + cw.remain -= written + + if err != nil { + return written, err + } + return len(p), nil +} + +func (cw *cappedWriter) Close() error { + return cw.w.Close() +} + +func (cw *cappedWriter) isFull() bool { + return cw.remain <= 0 +} + +// ExecSync executes a command in the container, and returns the stdout output. +// If command exits with a non-zero exit code, an error is returned. +func (c *criService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (*runtime.ExecSyncResponse, error) { + const maxStreamSize = 1024 * 1024 * 16 + + var stdout, stderr bytes.Buffer + + // cappedWriter truncates the output. In that case, the size of + // the ExecSyncResponse will hit the CRI plugin's gRPC response limit. + // Thus the callers outside of the containerd process (e.g. Kubelet) never see + // the truncated output. + cout := &cappedWriter{w: cioutil.NewNopWriteCloser(&stdout), remain: maxStreamSize} + cerr := &cappedWriter{w: cioutil.NewNopWriteCloser(&stderr), remain: maxStreamSize} + + exitCode, err := c.execInContainer(ctx, r.GetContainerId(), execOptions{ + cmd: r.GetCmd(), + stdout: cout, + stderr: cerr, + timeout: time.Duration(r.GetTimeout()) * time.Second, + }) + if err != nil { + return nil, fmt.Errorf("failed to exec in container: %w", err) + } + + return &runtime.ExecSyncResponse{ + Stdout: stdout.Bytes(), + Stderr: stderr.Bytes(), + ExitCode: int32(*exitCode), + }, nil +} + +// execOptions specifies how to execute command in container. +type execOptions struct { + cmd []string + stdin io.Reader + stdout io.WriteCloser + stderr io.WriteCloser + tty bool + resize <-chan remotecommand.TerminalSize + timeout time.Duration +} + +func (c *criService) execInternal(ctx context.Context, container containerd.Container, id string, opts execOptions) (*uint32, error) { + // Cancel the context before returning to ensure goroutines are stopped. + // This is important, because if `Start` returns error, `Wait` will hang + // forever unless we cancel the context. + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + spec, err := container.Spec(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get container spec: %w", err) + } + task, err := container.Task(ctx, nil) + if err != nil { + return nil, fmt.Errorf("failed to load task: %w", err) + } + pspec := spec.Process + + pspec.Terminal = opts.tty + if opts.tty { + if err := oci.WithEnv([]string{"TERM=xterm"})(ctx, nil, nil, spec); err != nil { + return nil, fmt.Errorf("add TERM env var to spec: %w", err) + } + } + + pspec.Args = opts.cmd + + if opts.stdout == nil { + opts.stdout = cio.NewDiscardLogger() + } + if opts.stderr == nil { + opts.stderr = cio.NewDiscardLogger() + } + execID := util.GenerateID() + log.G(ctx).Debugf("Generated exec id %q for container %q", execID, id) + volatileRootDir := c.getVolatileContainerRootDir(id) + var execIO *cio.ExecIO + process, err := task.Exec(ctx, execID, pspec, + func(id string) (containerdio.IO, error) { + var err error + execIO, err = cio.NewExecIO(id, volatileRootDir, opts.tty, opts.stdin != nil) + return execIO, err + }, + ) + if err != nil { + return nil, fmt.Errorf("failed to create exec %q: %w", execID, err) + } + defer func() { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + if _, err := process.Delete(deferCtx, containerd.WithProcessKill); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to delete exec process %q for container %q", execID, id) + } + }() + + exitCh, err := process.Wait(ctx) + if err != nil { + return nil, fmt.Errorf("failed to wait for process %q: %w", execID, err) + } + if err := process.Start(ctx); err != nil { + return nil, fmt.Errorf("failed to start exec %q: %w", execID, err) + } + + handleResizing(ctx, opts.resize, func(size remotecommand.TerminalSize) { + if err := process.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to resize process %q console for container %q", execID, id) + } + }) + + attachDone := execIO.Attach(cio.AttachOptions{ + Stdin: opts.stdin, + Stdout: opts.stdout, + Stderr: opts.stderr, + Tty: opts.tty, + StdinOnce: true, + CloseStdin: func() error { + return process.CloseIO(ctx, containerd.WithStdinCloser) + }, + }) + + execCtx := ctx + if opts.timeout > 0 { + var execCtxCancel context.CancelFunc + execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout) + defer execCtxCancel() + } + + select { + case <-execCtx.Done(): + // Ignore the not found error because the process may exit itself before killing. + if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to kill exec %q: %w", execID, err) + } + // Wait for the process to be killed. + exitRes := <-exitCh + log.G(ctx).Debugf("Timeout received while waiting for exec process kill %q code %d and error %v", + execID, exitRes.ExitCode(), exitRes.Error()) + <-attachDone + log.G(ctx).Debugf("Stream pipe for exec process %q done", execID) + return nil, fmt.Errorf("timeout %v exceeded: %w", opts.timeout, execCtx.Err()) + case exitRes := <-exitCh: + code, _, err := exitRes.Result() + log.G(ctx).Debugf("Exec process %q exits with exit code %d and error %v", execID, code, err) + if err != nil { + return nil, fmt.Errorf("failed while waiting for exec %q: %w", execID, err) + } + <-attachDone + log.G(ctx).Debugf("Stream pipe for exec process %q done", execID) + return &code, nil + } +} + +// execInContainer executes a command inside the container synchronously, and +// redirects stdio stream properly. +// This function only returns when the exec process exits, this means that: +// 1) As long as the exec process is running, the goroutine in the cri plugin +// will be running and wait for the exit code; +// 2) `kubectl exec -it` will hang until the exec process exits, even after io +// is detached. This is different from dockershim, which leaves the exec process +// running in background after io is detached. +// https://github.com/kubernetes/kubernetes/blob/v1.15.0/pkg/kubelet/dockershim/exec.go#L127 +// For example, if the `kubectl exec -it` process is killed, IO will be closed. In +// this case, the CRI plugin will still have a goroutine waiting for the exec process +// to exit and log the exit code, but dockershim won't. +func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) { + // Get container from our container store. + cntr, err := c.containerStore.Get(id) + + if err != nil { + return nil, fmt.Errorf("failed to find container %q in store: %w", id, err) + } + id = cntr.ID + + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state)) + } + + return c.execInternal(ctx, cntr.Container, id, opts) +} diff --git a/pkg/cri/sbserver/container_execsync_test.go b/pkg/cri/sbserver/container_execsync_test.go new file mode 100644 index 000000000..3b23aa677 --- /dev/null +++ b/pkg/cri/sbserver/container_execsync_test.go @@ -0,0 +1,52 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "bytes" + "testing" + + cioutil "github.com/containerd/containerd/pkg/ioutil" + "github.com/stretchr/testify/assert" +) + +func TestCWWrite(t *testing.T) { + var buf bytes.Buffer + cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 10} + + n, err := cw.Write([]byte("hello")) + assert.NoError(t, err) + assert.Equal(t, 5, n) + + n, err = cw.Write([]byte("helloworld")) + assert.NoError(t, err, "no errors even it hits the cap") + assert.Equal(t, 10, n, "no indication of partial write") + assert.True(t, cw.isFull()) + assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped") + + _, err = cw.Write([]byte("world")) + assert.NoError(t, err) + assert.True(t, cw.isFull()) + assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped") +} + +func TestCWClose(t *testing.T) { + var buf bytes.Buffer + cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 5} + err := cw.Close() + assert.NoError(t, err) +} diff --git a/pkg/cri/sbserver/container_list.go b/pkg/cri/sbserver/container_list.go new file mode 100644 index 000000000..51cb10268 --- /dev/null +++ b/pkg/cri/sbserver/container_list.go @@ -0,0 +1,116 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "time" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +// ListContainers lists all containers matching the filter. +func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) { + start := time.Now() + // List all containers from store. + containersInStore := c.containerStore.List() + + var containers []*runtime.Container + for _, container := range containersInStore { + containers = append(containers, toCRIContainer(container)) + } + + containers = c.filterCRIContainers(containers, r.GetFilter()) + + containerListTimer.UpdateSince(start) + return &runtime.ListContainersResponse{Containers: containers}, nil +} + +// toCRIContainer converts internal container object into CRI container. +func toCRIContainer(container containerstore.Container) *runtime.Container { + status := container.Status.Get() + return &runtime.Container{ + Id: container.ID, + PodSandboxId: container.SandboxID, + Metadata: container.Config.GetMetadata(), + Image: container.Config.GetImage(), + ImageRef: container.ImageRef, + State: status.State(), + CreatedAt: status.CreatedAt, + Labels: container.Config.GetLabels(), + Annotations: container.Config.GetAnnotations(), + } +} + +func (c *criService) normalizeContainerFilter(filter *runtime.ContainerFilter) { + if cntr, err := c.containerStore.Get(filter.GetId()); err == nil { + filter.Id = cntr.ID + } + if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil { + filter.PodSandboxId = sb.ID + } +} + +// filterCRIContainers filters CRIContainers. +func (c *criService) filterCRIContainers(containers []*runtime.Container, filter *runtime.ContainerFilter) []*runtime.Container { + if filter == nil { + return containers + } + + // The containerd cri plugin supports short ids so long as there is only one + // match. So we do a lookup against the store here if a pod id has been + // included in the filter. + sb := filter.GetPodSandboxId() + if sb != "" { + sandbox, err := c.sandboxStore.Get(sb) + if err == nil { + sb = sandbox.ID + } + } + + c.normalizeContainerFilter(filter) + filtered := []*runtime.Container{} + for _, cntr := range containers { + if filter.GetId() != "" && filter.GetId() != cntr.Id { + continue + } + if sb != "" && sb != cntr.PodSandboxId { + continue + } + if filter.GetState() != nil && filter.GetState().GetState() != cntr.State { + continue + } + if filter.GetLabelSelector() != nil { + match := true + for k, v := range filter.GetLabelSelector() { + got, ok := cntr.Labels[k] + if !ok || got != v { + match = false + break + } + } + if !match { + continue + } + } + filtered = append(filtered, cntr) + } + + return filtered +} diff --git a/pkg/cri/sbserver/container_list_test.go b/pkg/cri/sbserver/container_list_test.go new file mode 100644 index 000000000..58b26fb2d --- /dev/null +++ b/pkg/cri/sbserver/container_list_test.go @@ -0,0 +1,348 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func TestToCRIContainer(t *testing.T) { + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Image: &runtime.ImageSpec{Image: "test-image"}, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + } + createdAt := time.Now().UnixNano() + container, err := containerstore.NewContainer( + containerstore.Metadata{ + ID: "test-id", + Name: "test-name", + SandboxID: "test-sandbox-id", + Config: config, + ImageRef: "test-image-ref", + }, + containerstore.WithFakeStatus( + containerstore.Status{ + Pid: 1234, + CreatedAt: createdAt, + StartedAt: time.Now().UnixNano(), + FinishedAt: time.Now().UnixNano(), + ExitCode: 1, + Reason: "test-reason", + Message: "test-message", + }, + ), + ) + assert.NoError(t, err) + expect := &runtime.Container{ + Id: "test-id", + PodSandboxId: "test-sandbox-id", + Metadata: config.GetMetadata(), + Image: config.GetImage(), + ImageRef: "test-image-ref", + State: runtime.ContainerState_CONTAINER_EXITED, + CreatedAt: createdAt, + Labels: config.GetLabels(), + Annotations: config.GetAnnotations(), + } + c := toCRIContainer(container) + assert.Equal(t, expect, c) +} + +func TestFilterContainers(t *testing.T) { + c := newTestCRIService() + + testContainers := []*runtime.Container{ + { + Id: "1", + PodSandboxId: "s-1", + Metadata: &runtime.ContainerMetadata{Name: "name-1", Attempt: 1}, + State: runtime.ContainerState_CONTAINER_RUNNING, + }, + { + Id: "2", + PodSandboxId: "s-2", + Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 2}, + State: runtime.ContainerState_CONTAINER_EXITED, + Labels: map[string]string{"a": "b"}, + }, + { + Id: "3", + PodSandboxId: "s-2", + Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 3}, + State: runtime.ContainerState_CONTAINER_CREATED, + Labels: map[string]string{"c": "d"}, + }, + } + for desc, test := range map[string]struct { + filter *runtime.ContainerFilter + expect []*runtime.Container + }{ + "no filter": { + expect: testContainers, + }, + "id filter": { + filter: &runtime.ContainerFilter{Id: "2"}, + expect: []*runtime.Container{testContainers[1]}, + }, + "state filter": { + filter: &runtime.ContainerFilter{ + State: &runtime.ContainerStateValue{ + State: runtime.ContainerState_CONTAINER_EXITED, + }, + }, + expect: []*runtime.Container{testContainers[1]}, + }, + "label filter": { + filter: &runtime.ContainerFilter{ + LabelSelector: map[string]string{"a": "b"}, + }, + expect: []*runtime.Container{testContainers[1]}, + }, + "sandbox id filter": { + filter: &runtime.ContainerFilter{PodSandboxId: "s-2"}, + expect: []*runtime.Container{testContainers[1], testContainers[2]}, + }, + "mixed filter not matched": { + filter: &runtime.ContainerFilter{ + Id: "1", + PodSandboxId: "s-2", + LabelSelector: map[string]string{"a": "b"}, + }, + expect: []*runtime.Container{}, + }, + "mixed filter matched": { + filter: &runtime.ContainerFilter{ + PodSandboxId: "s-2", + State: &runtime.ContainerStateValue{ + State: runtime.ContainerState_CONTAINER_CREATED, + }, + LabelSelector: map[string]string{"c": "d"}, + }, + expect: []*runtime.Container{testContainers[2]}, + }, + } { + t.Run(desc, func(t *testing.T) { + filtered := c.filterCRIContainers(testContainers, test.filter) + assert.Equal(t, test.expect, filtered, desc) + }) + } +} + +// containerForTest is a helper type for test. +type containerForTest struct { + metadata containerstore.Metadata + status containerstore.Status +} + +func (c containerForTest) toContainer() (containerstore.Container, error) { + return containerstore.NewContainer( + c.metadata, + containerstore.WithFakeStatus(c.status), + ) +} + +func TestListContainers(t *testing.T) { + c := newTestCRIService() + sandboxesInStore := []sandboxstore.Sandbox{ + sandboxstore.NewSandbox( + sandboxstore.Metadata{ + ID: "s-1abcdef1234", + Name: "sandboxname-1", + Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-1"}}, + }, + sandboxstore.Status{ + State: sandboxstore.StateReady, + }, + ), + sandboxstore.NewSandbox( + sandboxstore.Metadata{ + ID: "s-2abcdef1234", + Name: "sandboxname-2", + Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-2"}}, + }, + sandboxstore.Status{ + State: sandboxstore.StateNotReady, + }, + ), + } + createdAt := time.Now().UnixNano() + startedAt := time.Now().UnixNano() + finishedAt := time.Now().UnixNano() + containersInStore := []containerForTest{ + { + metadata: containerstore.Metadata{ + ID: "c-1container", + Name: "name-1", + SandboxID: "s-1abcdef1234", + Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-1"}}, + }, + status: containerstore.Status{CreatedAt: createdAt}, + }, + { + metadata: containerstore.Metadata{ + ID: "c-2container", + Name: "name-2", + SandboxID: "s-1abcdef1234", + Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-2"}}, + }, + status: containerstore.Status{ + CreatedAt: createdAt, + StartedAt: startedAt, + }, + }, + { + metadata: containerstore.Metadata{ + ID: "c-3container", + Name: "name-3", + SandboxID: "s-1abcdef1234", + Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-3"}}, + }, + status: containerstore.Status{ + CreatedAt: createdAt, + StartedAt: startedAt, + FinishedAt: finishedAt, + }, + }, + { + metadata: containerstore.Metadata{ + ID: "c-4container", + Name: "name-4", + SandboxID: "s-2abcdef1234", + Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-4"}}, + }, + status: containerstore.Status{ + CreatedAt: createdAt, + }, + }, + } + + expectedContainers := []*runtime.Container{ + { + Id: "c-1container", + PodSandboxId: "s-1abcdef1234", + Metadata: &runtime.ContainerMetadata{Name: "name-1"}, + State: runtime.ContainerState_CONTAINER_CREATED, + CreatedAt: createdAt, + }, + { + Id: "c-2container", + PodSandboxId: "s-1abcdef1234", + Metadata: &runtime.ContainerMetadata{Name: "name-2"}, + State: runtime.ContainerState_CONTAINER_RUNNING, + CreatedAt: createdAt, + }, + { + Id: "c-3container", + PodSandboxId: "s-1abcdef1234", + Metadata: &runtime.ContainerMetadata{Name: "name-3"}, + State: runtime.ContainerState_CONTAINER_EXITED, + CreatedAt: createdAt, + }, + { + Id: "c-4container", + PodSandboxId: "s-2abcdef1234", + Metadata: &runtime.ContainerMetadata{Name: "name-4"}, + State: runtime.ContainerState_CONTAINER_CREATED, + CreatedAt: createdAt, + }, + } + + // Inject test sandbox metadata + for _, sb := range sandboxesInStore { + assert.NoError(t, c.sandboxStore.Add(sb)) + } + + // Inject test container metadata + for _, cntr := range containersInStore { + container, err := cntr.toContainer() + assert.NoError(t, err) + assert.NoError(t, c.containerStore.Add(container)) + } + + for testdesc, testdata := range map[string]struct { + filter *runtime.ContainerFilter + expect []*runtime.Container + }{ + "test without filter": { + filter: &runtime.ContainerFilter{}, + expect: expectedContainers, + }, + "test filter by sandboxid": { + filter: &runtime.ContainerFilter{ + PodSandboxId: "s-1abcdef1234", + }, + expect: expectedContainers[:3], + }, + "test filter by truncated sandboxid": { + filter: &runtime.ContainerFilter{ + PodSandboxId: "s-1", + }, + expect: expectedContainers[:3], + }, + "test filter by containerid": { + filter: &runtime.ContainerFilter{ + Id: "c-1container", + }, + expect: expectedContainers[:1], + }, + "test filter by truncated containerid": { + filter: &runtime.ContainerFilter{ + Id: "c-1", + }, + expect: expectedContainers[:1], + }, + "test filter by containerid and sandboxid": { + filter: &runtime.ContainerFilter{ + Id: "c-1container", + PodSandboxId: "s-1abcdef1234", + }, + expect: expectedContainers[:1], + }, + "test filter by truncated containerid and truncated sandboxid": { + filter: &runtime.ContainerFilter{ + Id: "c-1", + PodSandboxId: "s-1", + }, + expect: expectedContainers[:1], + }, + } { + t.Run(testdesc, func(t *testing.T) { + resp, err := c.ListContainers(context.Background(), &runtime.ListContainersRequest{Filter: testdata.filter}) + assert.NoError(t, err) + require.NotNil(t, resp) + containers := resp.GetContainers() + assert.Len(t, containers, len(testdata.expect)) + for _, cntr := range testdata.expect { + assert.Contains(t, containers, cntr) + } + }) + } +} diff --git a/pkg/cri/sbserver/container_log_reopen.go b/pkg/cri/sbserver/container_log_reopen.go new file mode 100644 index 000000000..96d726c28 --- /dev/null +++ b/pkg/cri/sbserver/container_log_reopen.go @@ -0,0 +1,52 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ReopenContainerLog asks the cri plugin to reopen the stdout/stderr log file for the container. +// This is often called after the log file has been rotated. +func (c *criService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (*runtime.ReopenContainerLogResponse, error) { + container, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + } + + if container.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING { + return nil, errors.New("container is not running") + } + + // Create new container logger and replace the existing ones. + stdoutWC, stderrWC, err := c.createContainerLoggers(container.LogPath, container.Config.GetTty()) + if err != nil { + return nil, err + } + oldStdoutWC, oldStderrWC := container.IO.AddOutput("log", stdoutWC, stderrWC) + if oldStdoutWC != nil { + oldStdoutWC.Close() + } + if oldStderrWC != nil { + oldStderrWC.Close() + } + return &runtime.ReopenContainerLogResponse{}, nil +} diff --git a/pkg/cri/sbserver/container_remove.go b/pkg/cri/sbserver/container_remove.go new file mode 100644 index 000000000..563d7cc73 --- /dev/null +++ b/pkg/cri/sbserver/container_remove.go @@ -0,0 +1,143 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + "github.com/sirupsen/logrus" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// RemoveContainer removes the container. +func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) { + start := time.Now() + container, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + } + // Do not return error if container metadata doesn't exist. + log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", r.GetContainerId()) + return &runtime.RemoveContainerResponse{}, nil + } + id := container.ID + i, err := container.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("get container info: %w", err) + } + + // Forcibly stop the containers if they are in running or unknown state + state := container.Status.Get().State() + if state == runtime.ContainerState_CONTAINER_RUNNING || + state == runtime.ContainerState_CONTAINER_UNKNOWN { + logrus.Infof("Forcibly stopping container %q", id) + if err := c.stopContainer(ctx, container, 0); err != nil { + return nil, fmt.Errorf("failed to forcibly stop container %q: %w", id, err) + } + + } + + // Set removing state to prevent other start/remove operations against this container + // while it's being removed. + if err := setContainerRemoving(container); err != nil { + return nil, fmt.Errorf("failed to set removing state for container %q: %w", id, err) + } + defer func() { + if retErr != nil { + // Reset removing if remove failed. + if err := resetContainerRemoving(container); err != nil { + log.G(ctx).WithError(err).Errorf("failed to reset removing state for container %q", id) + } + } + }() + + // NOTE(random-liu): Docker set container to "Dead" state when start removing the + // container so as to avoid start/restart the container again. However, for current + // kubelet implementation, we'll never start a container once we decide to remove it, + // so we don't need the "Dead" state for now. + + // Delete containerd container. + if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil { + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to delete containerd container %q: %w", id, err) + } + log.G(ctx).Tracef("Remove called for containerd container %q that does not exist", id) + } + + // Delete container checkpoint. + if err := container.Delete(); err != nil { + return nil, fmt.Errorf("failed to delete container checkpoint for %q: %w", id, err) + } + + containerRootDir := c.getContainerRootDir(id) + if err := ensureRemoveAll(ctx, containerRootDir); err != nil { + return nil, fmt.Errorf("failed to remove container root directory %q: %w", + containerRootDir, err) + } + volatileContainerRootDir := c.getVolatileContainerRootDir(id) + if err := ensureRemoveAll(ctx, volatileContainerRootDir); err != nil { + return nil, fmt.Errorf("failed to remove volatile container root directory %q: %w", + volatileContainerRootDir, err) + } + + c.containerStore.Delete(id) + + c.containerNameIndex.ReleaseByKey(id) + + containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start) + + return &runtime.RemoveContainerResponse{}, nil +} + +// setContainerRemoving sets the container into removing state. In removing state, the +// container will not be started or removed again. +func setContainerRemoving(container containerstore.Container) error { + return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + // Do not remove container if it's still running or unknown. + if status.State() == runtime.ContainerState_CONTAINER_RUNNING { + return status, errors.New("container is still running, to stop first") + } + if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN { + return status, errors.New("container state is unknown, to stop first") + } + if status.Starting { + return status, errors.New("container is in starting state, can't be removed") + } + if status.Removing { + return status, errors.New("container is already in removing state") + } + status.Removing = true + return status, nil + }) +} + +// resetContainerRemoving resets the container removing state on remove failure. So +// that we could remove the container again. +func resetContainerRemoving(container containerstore.Container) error { + return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + status.Removing = false + return status, nil + }) +} diff --git a/pkg/cri/sbserver/container_remove_test.go b/pkg/cri/sbserver/container_remove_test.go new file mode 100644 index 000000000..c3b049299 --- /dev/null +++ b/pkg/cri/sbserver/container_remove_test.go @@ -0,0 +1,86 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +// TestSetContainerRemoving tests setContainerRemoving sets removing +// state correctly. +func TestSetContainerRemoving(t *testing.T) { + testID := "test-id" + for desc, test := range map[string]struct { + status containerstore.Status + expectErr bool + }{ + "should return error when container is in running state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + }, + expectErr: true, + }, + "should return error when container is in starting state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + Starting: true, + }, + expectErr: true, + }, + "should return error when container is in removing state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + FinishedAt: time.Now().UnixNano(), + Removing: true, + }, + expectErr: true, + }, + "should not return error when container is not running and removing": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + FinishedAt: time.Now().UnixNano(), + }, + expectErr: false, + }, + } { + t.Run(desc, func(t *testing.T) { + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: testID}, + containerstore.WithFakeStatus(test.status), + ) + assert.NoError(t, err) + err = setContainerRemoving(container) + if test.expectErr { + assert.Error(t, err) + assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated") + } else { + assert.NoError(t, err) + assert.True(t, container.Status.Get().Removing, "removing should be set") + assert.NoError(t, resetContainerRemoving(container)) + assert.False(t, container.Status.Get().Removing, "removing should be reset") + } + }) + } +} diff --git a/pkg/cri/sbserver/container_start.go b/pkg/cri/sbserver/container_start.go new file mode 100644 index 000000000..6321524c8 --- /dev/null +++ b/pkg/cri/sbserver/container_start.go @@ -0,0 +1,249 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + "io" + "time" + + "github.com/containerd/containerd" + containerdio "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/nri" + v1 "github.com/containerd/nri/types/v1" + "github.com/sirupsen/logrus" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + cio "github.com/containerd/containerd/pkg/cri/io" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + cioutil "github.com/containerd/containerd/pkg/ioutil" +) + +// StartContainer starts the container. +func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) { + start := time.Now() + cntr, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + } + + info, err := cntr.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("get container info: %w", err) + } + + id := cntr.ID + meta := cntr.Metadata + container := cntr.Container + config := meta.Config + + // Set starting state to prevent other start/remove operations against this container + // while it's being started. + if err := setContainerStarting(cntr); err != nil { + return nil, fmt.Errorf("failed to set starting state for container %q: %w", id, err) + } + defer func() { + if retErr != nil { + // Set container to exited if fail to start. + if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + status.Pid = 0 + status.FinishedAt = time.Now().UnixNano() + status.ExitCode = errorStartExitCode + status.Reason = errorStartReason + status.Message = retErr.Error() + return status, nil + }); err != nil { + log.G(ctx).WithError(err).Errorf("failed to set start failure state for container %q", id) + } + } + if err := resetContainerStarting(cntr); err != nil { + log.G(ctx).WithError(err).Errorf("failed to reset starting state for container %q", id) + } + }() + + // Get sandbox config from sandbox store. + sandbox, err := c.sandboxStore.Get(meta.SandboxID) + if err != nil { + return nil, fmt.Errorf("sandbox %q not found: %w", meta.SandboxID, err) + } + sandboxID := meta.SandboxID + if sandbox.Status.Get().State != sandboxstore.StateReady { + return nil, fmt.Errorf("sandbox container %q is not running", sandboxID) + } + + // Recheck target container validity in Linux namespace options. + if linux := config.GetLinux(); linux != nil { + nsOpts := linux.GetSecurityContext().GetNamespaceOptions() + if nsOpts.GetPid() == runtime.NamespaceMode_TARGET { + _, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId) + if err != nil { + return nil, fmt.Errorf("invalid target container: %w", err) + } + } + } + + ioCreation := func(id string) (_ containerdio.IO, err error) { + stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty()) + if err != nil { + return nil, fmt.Errorf("failed to create container loggers: %w", err) + } + cntr.IO.AddOutput("log", stdoutWC, stderrWC) + cntr.IO.Pipe() + return cntr.IO, nil + } + + ctrInfo, err := container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get container info: %w", err) + } + + ociRuntime, err := c.getSandboxRuntime(sandbox.Config, sandbox.Metadata.RuntimeHandler) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) + } + + taskOpts := c.taskOpts(ctrInfo.Runtime.Name) + if ociRuntime.Path != "" { + taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path)) + } + task, err := container.NewTask(ctx, ioCreation, taskOpts...) + if err != nil { + return nil, fmt.Errorf("failed to create containerd task: %w", err) + } + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + // It's possible that task is deleted by event monitor. + if _, err := task.Delete(deferCtx, WithNRISandboxDelete(sandboxID), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { + log.G(ctx).WithError(err).Errorf("Failed to delete containerd task %q", id) + } + } + }() + + // wait is a long running background request, no timeout needed. + exitCh, err := task.Wait(ctrdutil.NamespacedContext()) + if err != nil { + return nil, fmt.Errorf("failed to wait for containerd task: %w", err) + } + nric, err := nri.New() + if err != nil { + log.G(ctx).WithError(err).Error("unable to create nri client") + } + if nric != nil { + nriSB := &nri.Sandbox{ + ID: sandboxID, + Labels: sandbox.Config.Labels, + } + if _, err := nric.InvokeWithSandbox(ctx, task, v1.Create, nriSB); err != nil { + return nil, fmt.Errorf("nri invoke: %w", err) + } + } + + // Start containerd task. + if err := task.Start(ctx); err != nil { + return nil, fmt.Errorf("failed to start containerd task %q: %w", id, err) + } + + // Update container start timestamp. + if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + status.Pid = task.Pid() + status.StartedAt = time.Now().UnixNano() + return status, nil + }); err != nil { + return nil, fmt.Errorf("failed to update container %q state: %w", id, err) + } + + // It handles the TaskExit event and update container state after this. + c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh) + + containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start) + + return &runtime.StartContainerResponse{}, nil +} + +// setContainerStarting sets the container into starting state. In starting state, the +// container will not be removed or started again. +func setContainerStarting(container containerstore.Container) error { + return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + // Return error if container is not in created state. + if status.State() != runtime.ContainerState_CONTAINER_CREATED { + return status, fmt.Errorf("container is in %s state", criContainerStateToString(status.State())) + } + // Do not start the container when there is a removal in progress. + if status.Removing { + return status, errors.New("container is in removing state, can't be started") + } + if status.Starting { + return status, errors.New("container is already in starting state") + } + status.Starting = true + return status, nil + }) +} + +// resetContainerStarting resets the container starting state on start failure. So +// that we could remove the container later. +func resetContainerStarting(container containerstore.Container) error { + return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + status.Starting = false + return status, nil + }) +} + +// createContainerLoggers creates container loggers and return write closer for stdout and stderr. +func (c *criService) createContainerLoggers(logPath string, tty bool) (stdout io.WriteCloser, stderr io.WriteCloser, err error) { + if logPath != "" { + // Only generate container log when log path is specified. + f, err := openLogFile(logPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to create and open log file: %w", err) + } + defer func() { + if err != nil { + f.Close() + } + }() + var stdoutCh, stderrCh <-chan struct{} + wc := cioutil.NewSerialWriteCloser(f) + stdout, stdoutCh = cio.NewCRILogger(logPath, wc, cio.Stdout, c.config.MaxContainerLogLineSize) + // Only redirect stderr when there is no tty. + if !tty { + stderr, stderrCh = cio.NewCRILogger(logPath, wc, cio.Stderr, c.config.MaxContainerLogLineSize) + } + go func() { + if stdoutCh != nil { + <-stdoutCh + } + if stderrCh != nil { + <-stderrCh + } + logrus.Debugf("Finish redirecting log file %q, closing it", logPath) + f.Close() + }() + } else { + stdout = cio.NewDiscardLogger() + stderr = cio.NewDiscardLogger() + } + return +} diff --git a/pkg/cri/sbserver/container_start_test.go b/pkg/cri/sbserver/container_start_test.go new file mode 100644 index 000000000..91184cd74 --- /dev/null +++ b/pkg/cri/sbserver/container_start_test.go @@ -0,0 +1,99 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +// TestSetContainerStarting tests setContainerStarting sets removing +// state correctly. +func TestSetContainerStarting(t *testing.T) { + testID := "test-id" + for desc, test := range map[string]struct { + status containerstore.Status + expectErr bool + }{ + + "should not return error when container is in created state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + }, + expectErr: false, + }, + "should return error when container is in running state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + }, + expectErr: true, + }, + "should return error when container is in exited state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + FinishedAt: time.Now().UnixNano(), + }, + expectErr: true, + }, + "should return error when container is in unknown state": { + status: containerstore.Status{ + CreatedAt: 0, + StartedAt: 0, + FinishedAt: 0, + }, + expectErr: true, + }, + "should return error when container is in starting state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + Starting: true, + }, + expectErr: true, + }, + "should return error when container is in removing state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + Removing: true, + }, + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: testID}, + containerstore.WithFakeStatus(test.status), + ) + assert.NoError(t, err) + err = setContainerStarting(container) + if test.expectErr { + assert.Error(t, err) + assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated") + } else { + assert.NoError(t, err) + assert.True(t, container.Status.Get().Starting, "starting should be set") + assert.NoError(t, resetContainerStarting(container)) + assert.False(t, container.Status.Get().Starting, "starting should be reset") + } + }) + } +} diff --git a/pkg/cri/sbserver/container_stats.go b/pkg/cri/sbserver/container_stats.go new file mode 100644 index 000000000..a41e4da4f --- /dev/null +++ b/pkg/cri/sbserver/container_stats.go @@ -0,0 +1,48 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + "github.com/containerd/containerd/api/services/tasks/v1" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ContainerStats returns stats of the container. If the container does not +// exist, the call returns an error. +func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerStatsRequest) (*runtime.ContainerStatsResponse, error) { + cntr, err := c.containerStore.Get(in.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("failed to find container: %w", err) + } + request := &tasks.MetricsRequest{Filters: []string{"id==" + cntr.ID}} + resp, err := c.client.TaskService().Metrics(ctx, request) + if err != nil { + return nil, fmt.Errorf("failed to fetch metrics for task: %w", err) + } + if len(resp.Metrics) != 1 { + return nil, fmt.Errorf("unexpected metrics response: %+v", resp.Metrics) + } + + cs, err := c.containerMetrics(cntr.Metadata, resp.Metrics[0]) + if err != nil { //nolint:staticcheck // Ignore SA4023 as some platforms always return nil (stats unimplemented) + return nil, fmt.Errorf("failed to decode container metrics: %w", err) + } + return &runtime.ContainerStatsResponse{Stats: cs}, nil +} diff --git a/pkg/cri/sbserver/container_stats_list.go b/pkg/cri/sbserver/container_stats_list.go new file mode 100644 index 000000000..aae5f849a --- /dev/null +++ b/pkg/cri/sbserver/container_stats_list.go @@ -0,0 +1,117 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + "github.com/containerd/containerd/api/services/tasks/v1" + "github.com/containerd/containerd/api/types" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +// ListContainerStats returns stats of all running containers. +func (c *criService) ListContainerStats( + ctx context.Context, + in *runtime.ListContainerStatsRequest, +) (*runtime.ListContainerStatsResponse, error) { + request, containers, err := c.buildTaskMetricsRequest(in) + if err != nil { + return nil, fmt.Errorf("failed to build metrics request: %w", err) + } + resp, err := c.client.TaskService().Metrics(ctx, request) + if err != nil { + return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err) + } + criStats, err := c.toCRIContainerStats(resp.Metrics, containers) + if err != nil { + return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err) + } + return criStats, nil +} + +func (c *criService) toCRIContainerStats( + stats []*types.Metric, + containers []containerstore.Container, +) (*runtime.ListContainerStatsResponse, error) { + statsMap := make(map[string]*types.Metric) + for _, stat := range stats { + statsMap[stat.ID] = stat + } + containerStats := new(runtime.ListContainerStatsResponse) + for _, cntr := range containers { + cs, err := c.containerMetrics(cntr.Metadata, statsMap[cntr.ID]) + if err != nil { //nolint:staticcheck // Ignore SA4023 as some platforms always return nil (metrics unimplemented) + return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err) + } + containerStats.Stats = append(containerStats.Stats, cs) + } + return containerStats, nil +} + +func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) { + if cntr, err := c.containerStore.Get(filter.GetId()); err == nil { + filter.Id = cntr.ID + } + if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil { + filter.PodSandboxId = sb.ID + } +} + +// buildTaskMetricsRequest constructs a tasks.MetricsRequest based on +// the information in the stats request and the containerStore +func (c *criService) buildTaskMetricsRequest( + r *runtime.ListContainerStatsRequest, +) (*tasks.MetricsRequest, []containerstore.Container, error) { + req := &tasks.MetricsRequest{} + if r.GetFilter() == nil { + return req, c.containerStore.List(), nil + } + c.normalizeContainerStatsFilter(r.GetFilter()) + var containers []containerstore.Container + for _, cntr := range c.containerStore.List() { + if r.GetFilter().GetId() != "" && cntr.ID != r.GetFilter().GetId() { + continue + } + if r.GetFilter().GetPodSandboxId() != "" && cntr.SandboxID != r.GetFilter().GetPodSandboxId() { + continue + } + if r.GetFilter().GetLabelSelector() != nil && + !matchLabelSelector(r.GetFilter().GetLabelSelector(), cntr.Config.GetLabels()) { + continue + } + containers = append(containers, cntr) + req.Filters = append(req.Filters, "id=="+cntr.ID) + } + return req, containers, nil +} + +func matchLabelSelector(selector, labels map[string]string) bool { + for k, v := range selector { + if val, ok := labels[k]; ok { + if v != val { + return false + } + } else { + return false + } + } + return true +} diff --git a/pkg/cri/sbserver/container_stats_list_linux.go b/pkg/cri/sbserver/container_stats_list_linux.go new file mode 100644 index 000000000..bdf4d1d54 --- /dev/null +++ b/pkg/cri/sbserver/container_stats_list_linux.go @@ -0,0 +1,279 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + "time" + + "github.com/containerd/containerd/api/types" + v1 "github.com/containerd/containerd/metrics/types/v1" + v2 "github.com/containerd/containerd/metrics/types/v2" + "github.com/containerd/containerd/protobuf" + "github.com/containerd/typeurl" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + "github.com/containerd/containerd/pkg/cri/store/stats" +) + +func (c *criService) containerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + var cs runtime.ContainerStats + var usedBytes, inodesUsed uint64 + sn, err := c.snapshotStore.Get(meta.ID) + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + if err == nil { + usedBytes = sn.Size + inodesUsed = sn.Inodes + } + cs.WritableLayer = &runtime.FilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, + } + cs.Attributes = &runtime.ContainerAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + } + + if stats != nil { + s, err := typeurl.UnmarshalAny(stats.Data) + if err != nil { + return nil, fmt.Errorf("failed to extract container metrics: %w", err) + } + + cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, s, protobuf.FromTimestamp(stats.Timestamp)) + if err != nil { + return nil, fmt.Errorf("failed to obtain cpu stats: %w", err) + } + cs.Cpu = cpuStats + + memoryStats, err := c.memoryContainerStats(meta.ID, s, protobuf.FromTimestamp(stats.Timestamp)) + if err != nil { + return nil, fmt.Errorf("failed to obtain memory stats: %w", err) + } + cs.Memory = memoryStats + } + + return &cs, nil +} + +func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, currentUsageCoreNanoSeconds uint64, currentTimestamp time.Time) (uint64, error) { + var oldStats *stats.ContainerStats + + if isSandbox { + sandbox, err := c.sandboxStore.Get(containerID) + if err != nil { + return 0, fmt.Errorf("failed to get sandbox container: %s: %w", containerID, err) + } + oldStats = sandbox.Stats + } else { + container, err := c.containerStore.Get(containerID) + if err != nil { + return 0, fmt.Errorf("failed to get container ID: %s: %w", containerID, err) + } + oldStats = container.Stats + } + + if oldStats == nil { + newStats := &stats.ContainerStats{ + UsageCoreNanoSeconds: currentUsageCoreNanoSeconds, + Timestamp: currentTimestamp, + } + if isSandbox { + err := c.sandboxStore.UpdateContainerStats(containerID, newStats) + if err != nil { + return 0, fmt.Errorf("failed to update sandbox stats container ID: %s: %w", containerID, err) + } + } else { + err := c.containerStore.UpdateContainerStats(containerID, newStats) + if err != nil { + return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err) + } + } + return 0, nil + } + + nanoSeconds := currentTimestamp.UnixNano() - oldStats.Timestamp.UnixNano() + + // zero or negative interval + if nanoSeconds <= 0 { + return 0, nil + } + + newUsageNanoCores := uint64(float64(currentUsageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) / + float64(nanoSeconds) * float64(time.Second/time.Nanosecond)) + + newStats := &stats.ContainerStats{ + UsageCoreNanoSeconds: currentUsageCoreNanoSeconds, + Timestamp: currentTimestamp, + } + if isSandbox { + err := c.sandboxStore.UpdateContainerStats(containerID, newStats) + if err != nil { + return 0, fmt.Errorf("failed to update sandbox container stats: %s: %w", containerID, err) + } + + } else { + err := c.containerStore.UpdateContainerStats(containerID, newStats) + if err != nil { + return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err) + } + } + + return newUsageNanoCores, nil +} + +// getWorkingSet calculates workingset memory from cgroup memory stats. +// The caller should make sure memory is not nil. +// workingset = usage - total_inactive_file +func getWorkingSet(memory *v1.MemoryStat) uint64 { + if memory.Usage == nil { + return 0 + } + var workingSet uint64 + if memory.TotalInactiveFile < memory.Usage.Usage { + workingSet = memory.Usage.Usage - memory.TotalInactiveFile + } + return workingSet +} + +// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats. +// The caller should make sure memory is not nil. +// workingset = usage - inactive_file +func getWorkingSetV2(memory *v2.MemoryStat) uint64 { + var workingSet uint64 + if memory.InactiveFile < memory.Usage { + workingSet = memory.Usage - memory.InactiveFile + } + return workingSet +} + +func isMemoryUnlimited(v uint64) bool { + // Size after which we consider memory to be "unlimited". This is not + // MaxInt64 due to rounding by the kernel. + // TODO: k8s or cadvisor should export this https://github.com/google/cadvisor/blob/2b6fbacac7598e0140b5bc8428e3bdd7d86cf5b9/metrics/prometheus.go#L1969-L1971 + const maxMemorySize = uint64(1 << 62) + + return v > maxMemorySize +} + +// https://github.com/kubernetes/kubernetes/blob/b47f8263e18c7b13dba33fba23187e5e0477cdbd/pkg/kubelet/stats/helper.go#L68-L71 +func getAvailableBytes(memory *v1.MemoryStat, workingSetBytes uint64) uint64 { + // memory limit - working set bytes + if !isMemoryUnlimited(memory.Usage.Limit) { + return memory.Usage.Limit - workingSetBytes + } + return 0 +} + +func getAvailableBytesV2(memory *v2.MemoryStat, workingSetBytes uint64) uint64 { + // memory limit (memory.max) for cgroupv2 - working set bytes + if !isMemoryUnlimited(memory.UsageLimit) { + return memory.UsageLimit - workingSetBytes + } + return 0 +} + +func (c *criService) cpuContainerStats(ID string, isSandbox bool, stats interface{}, timestamp time.Time) (*runtime.CpuUsage, error) { + switch metrics := stats.(type) { + case *v1.Metrics: + if metrics.CPU != nil && metrics.CPU.Usage != nil { + + usageNanoCores, err := c.getUsageNanoCores(ID, isSandbox, metrics.CPU.Usage.Total, timestamp) + if err != nil { + return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", ID, err) + } + + return &runtime.CpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total}, + UsageNanoCores: &runtime.UInt64Value{Value: usageNanoCores}, + }, nil + } + case *v2.Metrics: + if metrics.CPU != nil { + // convert to nano seconds + usageCoreNanoSeconds := metrics.CPU.UsageUsec * 1000 + + usageNanoCores, err := c.getUsageNanoCores(ID, isSandbox, usageCoreNanoSeconds, timestamp) + if err != nil { + return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", ID, err) + } + + return &runtime.CpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: usageCoreNanoSeconds}, + UsageNanoCores: &runtime.UInt64Value{Value: usageNanoCores}, + }, nil + } + default: + return nil, fmt.Errorf("unexpected metrics type: %v", metrics) + } + return nil, nil +} + +func (c *criService) memoryContainerStats(ID string, stats interface{}, timestamp time.Time) (*runtime.MemoryUsage, error) { + switch metrics := stats.(type) { + case *v1.Metrics: + if metrics.Memory != nil && metrics.Memory.Usage != nil { + workingSetBytes := getWorkingSet(metrics.Memory) + + return &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: workingSetBytes, + }, + AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytes(metrics.Memory, workingSetBytes)}, + UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage.Usage}, + RssBytes: &runtime.UInt64Value{Value: metrics.Memory.TotalRSS}, + PageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgFault}, + MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgMajFault}, + }, nil + } + case *v2.Metrics: + if metrics.Memory != nil { + workingSetBytes := getWorkingSetV2(metrics.Memory) + + return &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: workingSetBytes, + }, + AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytesV2(metrics.Memory, workingSetBytes)}, + UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage}, + // Use Anon memory for RSS as cAdvisor on cgroupv2 + // see https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799 + RssBytes: &runtime.UInt64Value{Value: metrics.Memory.Anon}, + PageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgfault}, + MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgmajfault}, + }, nil + } + default: + return nil, fmt.Errorf("unexpected metrics type: %v", metrics) + } + return nil, nil +} diff --git a/pkg/cri/sbserver/container_stats_list_linux_test.go b/pkg/cri/sbserver/container_stats_list_linux_test.go new file mode 100644 index 000000000..2d614d52a --- /dev/null +++ b/pkg/cri/sbserver/container_stats_list_linux_test.go @@ -0,0 +1,329 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "math" + "testing" + "time" + + v1 "github.com/containerd/cgroups/stats/v1" + v2 "github.com/containerd/cgroups/v2/stats" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func TestGetWorkingSet(t *testing.T) { + for desc, test := range map[string]struct { + memory *v1.MemoryStat + expected uint64 + }{ + "nil memory usage": { + memory: &v1.MemoryStat{}, + expected: 0, + }, + "memory usage higher than inactive_total_file": { + memory: &v1.MemoryStat{ + TotalInactiveFile: 1000, + Usage: &v1.MemoryEntry{Usage: 2000}, + }, + expected: 1000, + }, + "memory usage lower than inactive_total_file": { + memory: &v1.MemoryStat{ + TotalInactiveFile: 2000, + Usage: &v1.MemoryEntry{Usage: 1000}, + }, + expected: 0, + }, + } { + t.Run(desc, func(t *testing.T) { + got := getWorkingSet(test.memory) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestGetWorkingSetV2(t *testing.T) { + for desc, test := range map[string]struct { + memory *v2.MemoryStat + expected uint64 + }{ + "nil memory usage": { + memory: &v2.MemoryStat{}, + expected: 0, + }, + "memory usage higher than inactive_total_file": { + memory: &v2.MemoryStat{ + InactiveFile: 1000, + Usage: 2000, + }, + expected: 1000, + }, + "memory usage lower than inactive_total_file": { + memory: &v2.MemoryStat{ + InactiveFile: 2000, + Usage: 1000, + }, + expected: 0, + }, + } { + t.Run(desc, func(t *testing.T) { + got := getWorkingSetV2(test.memory) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestGetAvailableBytes(t *testing.T) { + for desc, test := range map[string]struct { + memory *v1.MemoryStat + workingSetBytes uint64 + expected uint64 + }{ + + "no limit": { + memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: math.MaxUint64, // no limit + Usage: 1000, + }, + }, + workingSetBytes: 500, + expected: 0, + }, + "with limit": { + memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: 5000, + Usage: 1000, + }, + }, + workingSetBytes: 500, + expected: 5000 - 500, + }, + } { + t.Run(desc, func(t *testing.T) { + got := getAvailableBytes(test.memory, test.workingSetBytes) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestGetAvailableBytesV2(t *testing.T) { + for desc, test := range map[string]struct { + memory *v2.MemoryStat + workingSetBytes uint64 + expected uint64 + }{ + + "no limit": { + memory: &v2.MemoryStat{ + UsageLimit: math.MaxUint64, // no limit + Usage: 1000, + }, + workingSetBytes: 500, + expected: 0, + }, + "with limit": { + memory: &v2.MemoryStat{ + UsageLimit: 5000, + Usage: 1000, + }, + workingSetBytes: 500, + expected: 5000 - 500, + }, + } { + t.Run(desc, func(t *testing.T) { + got := getAvailableBytesV2(test.memory, test.workingSetBytes) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestContainerMetricsCPU(t *testing.T) { + c := newTestCRIService() + timestamp := time.Now() + secondAfterTimeStamp := timestamp.Add(time.Second) + ID := "ID" + + for desc, test := range map[string]struct { + firstMetrics interface{} + secondMetrics interface{} + expectedFirst *runtime.CpuUsage + expectedSecond *runtime.CpuUsage + }{ + "v1 metrics": { + firstMetrics: &v1.Metrics{ + CPU: &v1.CPUStat{ + Usage: &v1.CPUUsage{ + Total: 50, + }, + }, + }, + secondMetrics: &v1.Metrics{ + CPU: &v1.CPUStat{ + Usage: &v1.CPUUsage{ + Total: 500, + }, + }, + }, + expectedFirst: &runtime.CpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 50}, + UsageNanoCores: &runtime.UInt64Value{Value: 0}, + }, + expectedSecond: &runtime.CpuUsage{ + Timestamp: secondAfterTimeStamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 500}, + UsageNanoCores: &runtime.UInt64Value{Value: 450}, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: ID}, + ) + assert.NoError(t, err) + assert.Nil(t, container.Stats) + err = c.containerStore.Add(container) + assert.NoError(t, err) + + cpuUsage, err := c.cpuContainerStats(ID, false, test.firstMetrics, timestamp) + assert.NoError(t, err) + + container, err = c.containerStore.Get(ID) + assert.NoError(t, err) + assert.NotNil(t, container.Stats) + + assert.Equal(t, test.expectedFirst, cpuUsage) + + cpuUsage, err = c.cpuContainerStats(ID, false, test.secondMetrics, secondAfterTimeStamp) + assert.NoError(t, err) + assert.Equal(t, test.expectedSecond, cpuUsage) + + container, err = c.containerStore.Get(ID) + assert.NoError(t, err) + assert.NotNil(t, container.Stats) + }) + } + +} + +func TestContainerMetricsMemory(t *testing.T) { + c := newTestCRIService() + timestamp := time.Now() + + for desc, test := range map[string]struct { + metrics interface{} + expected *runtime.MemoryUsage + }{ + "v1 metrics - no memory limit": { + metrics: &v1.Metrics{ + Memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: math.MaxUint64, // no limit + Usage: 1000, + }, + TotalRSS: 10, + TotalPgFault: 11, + TotalPgMajFault: 12, + TotalInactiveFile: 500, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 500}, + AvailableBytes: &runtime.UInt64Value{Value: 0}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 10}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + "v1 metrics - memory limit": { + metrics: &v1.Metrics{ + Memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: 5000, + Usage: 1000, + }, + TotalRSS: 10, + TotalPgFault: 11, + TotalPgMajFault: 12, + TotalInactiveFile: 500, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 500}, + AvailableBytes: &runtime.UInt64Value{Value: 4500}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 10}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + "v2 metrics - memory limit": { + metrics: &v2.Metrics{ + Memory: &v2.MemoryStat{ + Usage: 1000, + UsageLimit: 5000, + InactiveFile: 0, + Pgfault: 11, + Pgmajfault: 12, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, + AvailableBytes: &runtime.UInt64Value{Value: 4000}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 0}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + "v2 metrics - no memory limit": { + metrics: &v2.Metrics{ + Memory: &v2.MemoryStat{ + Usage: 1000, + UsageLimit: math.MaxUint64, // no limit + InactiveFile: 0, + Pgfault: 11, + Pgmajfault: 12, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, + AvailableBytes: &runtime.UInt64Value{Value: 0}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 0}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + got, err := c.memoryContainerStats("ID", test.metrics, timestamp) + assert.NoError(t, err) + assert.Equal(t, test.expected, got) + }) + } +} diff --git a/pkg/cri/sbserver/container_stats_list_other.go b/pkg/cri/sbserver/container_stats_list_other.go new file mode 100644 index 000000000..2d6d0833a --- /dev/null +++ b/pkg/cri/sbserver/container_stats_list_other.go @@ -0,0 +1,38 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/errdefs" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +func (c *criService) containerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + var cs runtime.ContainerStats + return &cs, fmt.Errorf("container metrics: %w", errdefs.ErrNotImplemented) +} diff --git a/pkg/cri/sbserver/container_stats_list_windows.go b/pkg/cri/sbserver/container_stats_list_windows.go new file mode 100644 index 000000000..376505c40 --- /dev/null +++ b/pkg/cri/sbserver/container_stats_list_windows.go @@ -0,0 +1,84 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "errors" + "fmt" + + wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/containerd/containerd/api/types" + "github.com/containerd/typeurl" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +func (c *criService) containerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + var cs runtime.ContainerStats + var usedBytes, inodesUsed uint64 + sn, err := c.snapshotStore.Get(meta.ID) + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + if err == nil { + usedBytes = sn.Size + inodesUsed = sn.Inodes + } + cs.WritableLayer = &runtime.FilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, + } + cs.Attributes = &runtime.ContainerAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + } + + if stats != nil { + s, err := typeurl.UnmarshalAny(stats.Data) + if err != nil { + return nil, fmt.Errorf("failed to extract container metrics: %w", err) + } + wstats := s.(*wstats.Statistics).GetWindows() + if wstats == nil { + return nil, errors.New("windows stats is empty") + } + if wstats.Processor != nil { + cs.Cpu = &runtime.CpuUsage{ + Timestamp: wstats.Timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, + } + } + if wstats.Memory != nil { + cs.Memory = &runtime.MemoryUsage{ + Timestamp: wstats.Timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, + }, + } + } + } + return &cs, nil +} diff --git a/pkg/cri/sbserver/container_status.go b/pkg/cri/sbserver/container_status.go new file mode 100644 index 000000000..2890771da --- /dev/null +++ b/pkg/cri/sbserver/container_status.go @@ -0,0 +1,183 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/containerd/containerd/errdefs" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ContainerStatus inspects the container and returns the status. +func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (*runtime.ContainerStatusResponse, error) { + container, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + } + + // TODO(random-liu): Clean up the following logic in CRI. + // Current assumption: + // * ImageSpec in container config is image ID. + // * ImageSpec in container status is image tag. + // * ImageRef in container status is repo digest. + spec := container.Config.GetImage() + imageRef := container.ImageRef + image, err := c.imageStore.Get(imageRef) + if err != nil { + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to get image %q: %w", imageRef, err) + } + } else { + repoTags, repoDigests := parseImageReferences(image.References) + if len(repoTags) > 0 { + // Based on current behavior of dockershim, this field should be + // image tag. + spec = &runtime.ImageSpec{Image: repoTags[0]} + } + if len(repoDigests) > 0 { + // Based on the CRI definition, this field will be consumed by user. + imageRef = repoDigests[0] + } + } + status := toCRIContainerStatus(container, spec, imageRef) + if status.GetCreatedAt() == 0 { + // CRI doesn't allow CreatedAt == 0. + info, err := container.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get CreatedAt in %q state: %w", status.State, err) + } + status.CreatedAt = info.CreatedAt.UnixNano() + } + + info, err := toCRIContainerInfo(ctx, container, r.GetVerbose()) + if err != nil { + return nil, fmt.Errorf("failed to get verbose container info: %w", err) + } + + return &runtime.ContainerStatusResponse{ + Status: status, + Info: info, + }, nil +} + +// toCRIContainerStatus converts internal container object to CRI container status. +func toCRIContainerStatus(container containerstore.Container, spec *runtime.ImageSpec, imageRef string) *runtime.ContainerStatus { + meta := container.Metadata + status := container.Status.Get() + reason := status.Reason + if status.State() == runtime.ContainerState_CONTAINER_EXITED && reason == "" { + if status.ExitCode == 0 { + reason = completeExitReason + } else { + reason = errorExitReason + } + } + + // If container is in the created state, not set started and finished unix timestamps + var st, ft int64 + switch status.State() { + case runtime.ContainerState_CONTAINER_RUNNING: + // If container is in the running state, set started unix timestamps + st = status.StartedAt + case runtime.ContainerState_CONTAINER_EXITED, runtime.ContainerState_CONTAINER_UNKNOWN: + st, ft = status.StartedAt, status.FinishedAt + } + + return &runtime.ContainerStatus{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + State: status.State(), + CreatedAt: status.CreatedAt, + StartedAt: st, + FinishedAt: ft, + ExitCode: status.ExitCode, + Image: spec, + ImageRef: imageRef, + Reason: reason, + Message: status.Message, + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + Mounts: meta.Config.GetMounts(), + LogPath: meta.LogPath, + } +} + +// ContainerInfo is extra information for a container. +type ContainerInfo struct { + // TODO(random-liu): Add sandboxID in CRI container status. + SandboxID string `json:"sandboxID"` + Pid uint32 `json:"pid"` + Removing bool `json:"removing"` + SnapshotKey string `json:"snapshotKey"` + Snapshotter string `json:"snapshotter"` + RuntimeType string `json:"runtimeType"` + RuntimeOptions interface{} `json:"runtimeOptions"` + Config *runtime.ContainerConfig `json:"config"` + RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"` +} + +// toCRIContainerInfo converts internal container object information to CRI container status response info map. +func toCRIContainerInfo(ctx context.Context, container containerstore.Container, verbose bool) (map[string]string, error) { + if !verbose { + return nil, nil + } + + meta := container.Metadata + status := container.Status.Get() + + // TODO(random-liu): Change CRI status info to use array instead of map. + ci := &ContainerInfo{ + SandboxID: container.SandboxID, + Pid: status.Pid, + Removing: status.Removing, + Config: meta.Config, + } + + var err error + ci.RuntimeSpec, err = container.Container.Spec(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get container runtime spec: %w", err) + } + + ctrInfo, err := container.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get container info: %w", err) + } + ci.SnapshotKey = ctrInfo.SnapshotKey + ci.Snapshotter = ctrInfo.Snapshotter + + runtimeOptions, err := getRuntimeOptions(ctrInfo) + if err != nil { + return nil, fmt.Errorf("failed to get runtime options: %w", err) + } + ci.RuntimeType = ctrInfo.Runtime.Name + ci.RuntimeOptions = runtimeOptions + + infoBytes, err := json.Marshal(ci) + if err != nil { + return nil, fmt.Errorf("failed to marshal info %v: %w", ci, err) + } + return map[string]string{ + "info": string(infoBytes), + }, nil +} diff --git a/pkg/cri/sbserver/container_status_test.go b/pkg/cri/sbserver/container_status_test.go new file mode 100644 index 000000000..e43d21d81 --- /dev/null +++ b/pkg/cri/sbserver/container_status_test.go @@ -0,0 +1,258 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" +) + +func getContainerStatusTestData() (*containerstore.Metadata, *containerstore.Status, + *imagestore.Image, *runtime.ContainerStatus) { + imageID := "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" + testID := "test-id" + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Image: &runtime.ImageSpec{Image: "test-image"}, + Mounts: []*runtime.Mount{{ + ContainerPath: "test-container-path", + HostPath: "test-host-path", + }}, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + } + + createdAt := time.Now().UnixNano() + + metadata := &containerstore.Metadata{ + ID: testID, + Name: "test-long-name", + SandboxID: "test-sandbox-id", + Config: config, + ImageRef: imageID, + LogPath: "test-log-path", + } + status := &containerstore.Status{ + Pid: 1234, + CreatedAt: createdAt, + } + image := &imagestore.Image{ + ID: imageID, + References: []string{ + "gcr.io/library/busybox:latest", + "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + }, + } + expected := &runtime.ContainerStatus{ + Id: testID, + Metadata: config.GetMetadata(), + State: runtime.ContainerState_CONTAINER_CREATED, + CreatedAt: createdAt, + Image: &runtime.ImageSpec{Image: "gcr.io/library/busybox:latest"}, + ImageRef: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + Reason: completeExitReason, + Labels: config.GetLabels(), + Annotations: config.GetAnnotations(), + Mounts: config.GetMounts(), + LogPath: "test-log-path", + } + + return metadata, status, image, expected +} + +func TestToCRIContainerStatus(t *testing.T) { + for desc, test := range map[string]struct { + startedAt int64 + finishedAt int64 + exitCode int32 + reason string + message string + expectedState runtime.ContainerState + expectedReason string + }{ + "container created": { + expectedState: runtime.ContainerState_CONTAINER_CREATED, + }, + "container running": { + startedAt: time.Now().UnixNano(), + expectedState: runtime.ContainerState_CONTAINER_RUNNING, + }, + "container exited with reason": { + startedAt: time.Now().UnixNano(), + finishedAt: time.Now().UnixNano(), + exitCode: 1, + reason: "test-reason", + message: "test-message", + expectedState: runtime.ContainerState_CONTAINER_EXITED, + expectedReason: "test-reason", + }, + "container exited with exit code 0 without reason": { + startedAt: time.Now().UnixNano(), + finishedAt: time.Now().UnixNano(), + exitCode: 0, + message: "test-message", + expectedState: runtime.ContainerState_CONTAINER_EXITED, + expectedReason: completeExitReason, + }, + "container exited with non-zero exit code without reason": { + startedAt: time.Now().UnixNano(), + finishedAt: time.Now().UnixNano(), + exitCode: 1, + message: "test-message", + expectedState: runtime.ContainerState_CONTAINER_EXITED, + expectedReason: errorExitReason, + }, + } { + t.Run(desc, func(t *testing.T) { + + metadata, status, _, expected := getContainerStatusTestData() + // Update status with test case. + status.StartedAt = test.startedAt + status.FinishedAt = test.finishedAt + status.ExitCode = test.exitCode + status.Reason = test.reason + status.Message = test.message + container, err := containerstore.NewContainer( + *metadata, + containerstore.WithFakeStatus(*status), + ) + assert.NoError(t, err) + // Set expectation based on test case. + expected.Reason = test.expectedReason + expected.StartedAt = test.startedAt + expected.FinishedAt = test.finishedAt + expected.ExitCode = test.exitCode + expected.Message = test.message + patchExceptedWithState(expected, test.expectedState) + containerStatus := toCRIContainerStatus(container, + expected.Image, + expected.ImageRef) + assert.Equal(t, expected, containerStatus, desc) + }) + } +} + +// TODO(mikebrow): add a fake containerd container.Container.Spec client api so we can test verbose is true option +func TestToCRIContainerInfo(t *testing.T) { + metadata, status, _, _ := getContainerStatusTestData() + container, err := containerstore.NewContainer( + *metadata, + containerstore.WithFakeStatus(*status), + ) + assert.NoError(t, err) + + info, err := toCRIContainerInfo(context.Background(), + container, + false) + assert.NoError(t, err) + assert.Nil(t, info) +} + +func TestContainerStatus(t *testing.T) { + for desc, test := range map[string]struct { + exist bool + imageExist bool + startedAt int64 + finishedAt int64 + reason string + expectedState runtime.ContainerState + expectErr bool + }{ + "container created": { + exist: true, + imageExist: true, + expectedState: runtime.ContainerState_CONTAINER_CREATED, + }, + "container running": { + exist: true, + imageExist: true, + startedAt: time.Now().UnixNano(), + expectedState: runtime.ContainerState_CONTAINER_RUNNING, + }, + "container exited": { + exist: true, + imageExist: true, + startedAt: time.Now().UnixNano(), + finishedAt: time.Now().UnixNano(), + reason: "test-reason", + expectedState: runtime.ContainerState_CONTAINER_EXITED, + }, + "container not exist": { + exist: false, + imageExist: true, + expectErr: true, + }, + "image not exist": { + exist: false, + imageExist: false, + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + metadata, status, image, expected := getContainerStatusTestData() + // Update status with test case. + status.StartedAt = test.startedAt + status.FinishedAt = test.finishedAt + status.Reason = test.reason + container, err := containerstore.NewContainer( + *metadata, + containerstore.WithFakeStatus(*status), + ) + assert.NoError(t, err) + if test.exist { + assert.NoError(t, c.containerStore.Add(container)) + } + if test.imageExist { + c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{*image}) + assert.NoError(t, err) + } + resp, err := c.ContainerStatus(context.Background(), &runtime.ContainerStatusRequest{ContainerId: container.ID}) + if test.expectErr { + assert.Error(t, err) + assert.Nil(t, resp) + return + } + // Set expectation based on test case. + expected.StartedAt = test.startedAt + expected.FinishedAt = test.finishedAt + expected.Reason = test.reason + patchExceptedWithState(expected, test.expectedState) + assert.Equal(t, expected, resp.GetStatus()) + }) + } +} + +func patchExceptedWithState(expected *runtime.ContainerStatus, state runtime.ContainerState) { + expected.State = state + switch state { + case runtime.ContainerState_CONTAINER_CREATED: + expected.StartedAt, expected.FinishedAt = 0, 0 + case runtime.ContainerState_CONTAINER_RUNNING: + expected.FinishedAt = 0 + } +} diff --git a/pkg/cri/sbserver/container_stop.go b/pkg/cri/sbserver/container_stop.go new file mode 100644 index 000000000..bffbfb51a --- /dev/null +++ b/pkg/cri/sbserver/container_stop.go @@ -0,0 +1,208 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "sync/atomic" + "syscall" + "time" + + eventtypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + "github.com/containerd/containerd/protobuf" + + "github.com/moby/sys/signal" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// StopContainer stops a running container with a grace period (i.e., timeout). +func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) { + start := time.Now() + // Get container config from container store. + container, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + } + + if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil { + return nil, err + } + + i, err := container.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("get container info: %w", err) + } + + containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start) + + return &runtime.StopContainerResponse{}, nil +} + +// stopContainer stops a container based on the container metadata. +func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error { + id := container.ID + + // Return without error if container is not running. This makes sure that + // stop only takes real action after the container is started. + state := container.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING && + state != runtime.ContainerState_CONTAINER_UNKNOWN { + log.G(ctx).Infof("Container to stop %q must be in running or unknown state, current state %q", + id, criContainerStateToString(state)) + return nil + } + + task, err := container.Container.Task(ctx, nil) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to get task for container %q: %w", id, err) + } + // Don't return for unknown state, some cleanup needs to be done. + if state == runtime.ContainerState_CONTAINER_UNKNOWN { + return cleanupUnknownContainer(ctx, id, container) + } + return nil + } + + // Handle unknown state. + if state == runtime.ContainerState_CONTAINER_UNKNOWN { + // Start an exit handler for containers in unknown state. + waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext()) + defer waitCancel() + exitCh, err := task.Wait(waitCtx) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to wait for task for %q: %w", id, err) + } + return cleanupUnknownContainer(ctx, id, container) + } + + exitCtx, exitCancel := context.WithCancel(context.Background()) + stopCh := c.eventMonitor.startContainerExitMonitor(exitCtx, id, task.Pid(), exitCh) + defer func() { + exitCancel() + // This ensures that exit monitor is stopped before + // `Wait` is cancelled, so no exit event is generated + // because of the `Wait` cancellation. + <-stopCh + }() + } + + // We only need to kill the task. The event handler will Delete the + // task from containerd after it handles the Exited event. + if timeout > 0 { + stopSignal := "SIGTERM" + if container.StopSignal != "" { + stopSignal = container.StopSignal + } else { + // The image may have been deleted, and the `StopSignal` field is + // just introduced to handle that. + // However, for containers created before the `StopSignal` field is + // introduced, still try to get the stop signal from the image config. + // If the image has been deleted, logging an error and using the + // default SIGTERM is still better than returning error and leaving + // the container unstoppable. (See issue #990) + // TODO(random-liu): Remove this logic when containerd 1.2 is deprecated. + image, err := c.imageStore.Get(container.ImageRef) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to get image %q: %w", container.ImageRef, err) + } + log.G(ctx).Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal) + } else { + if image.ImageSpec.Config.StopSignal != "" { + stopSignal = image.ImageSpec.Config.StopSignal + } + } + } + sig, err := signal.ParseSignal(stopSignal) + if err != nil { + return fmt.Errorf("failed to parse stop signal %q: %w", stopSignal, err) + } + + var sswt bool + if container.IsStopSignaledWithTimeout == nil { + log.G(ctx).Infof("unable to ensure stop signal %v was not sent twice to container %v", sig, id) + sswt = true + } else { + sswt = atomic.CompareAndSwapUint32(container.IsStopSignaledWithTimeout, 0, 1) + } + + if sswt { + log.G(ctx).Infof("Stop container %q with signal %v", id, sig) + if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to stop container %q: %w", id, err) + } + } else { + log.G(ctx).Infof("Skipping the sending of signal %v to container %q because a prior stop with timeout>0 request already sent the signal", sig, id) + } + + sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout) + defer sigTermCtxCancel() + err = c.waitContainerStop(sigTermCtx, container) + if err == nil { + // Container stopped on first signal no need for SIGKILL + return nil + } + // If the parent context was cancelled or exceeded return immediately + if ctx.Err() != nil { + return ctx.Err() + } + // sigTermCtx was exceeded. Send SIGKILL + log.G(ctx).Debugf("Stop container %q with signal %v timed out", id, sig) + } + + log.G(ctx).Infof("Kill container %q", id) + if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to kill container %q: %w", id, err) + } + + // Wait for a fixed timeout until container stop is observed by event monitor. + err = c.waitContainerStop(ctx, container) + if err != nil { + return fmt.Errorf("an error occurs during waiting for container %q to be killed: %w", id, err) + } + return nil +} + +// waitContainerStop waits for container to be stopped until context is +// cancelled or the context deadline is exceeded. +func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error { + select { + case <-ctx.Done(): + return fmt.Errorf("wait container %q: %w", container.ID, ctx.Err()) + case <-container.Stopped(): + return nil + } +} + +// cleanupUnknownContainer cleanup stopped container in unknown state. +func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error { + // Reuse handleContainerExit to do the cleanup. + return handleContainerExit(ctx, &eventtypes.TaskExit{ + ContainerID: id, + ID: id, + Pid: 0, + ExitStatus: unknownExitCode, + ExitedAt: protobuf.ToTimestamp(time.Now()), + }, cntr) +} diff --git a/pkg/cri/sbserver/container_stop_test.go b/pkg/cri/sbserver/container_stop_test.go new file mode 100644 index 000000000..70b1460ac --- /dev/null +++ b/pkg/cri/sbserver/container_stop_test.go @@ -0,0 +1,87 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +func TestWaitContainerStop(t *testing.T) { + id := "test-id" + for desc, test := range map[string]struct { + status *containerstore.Status + cancel bool + timeout time.Duration + expectErr bool + }{ + "should return error if timeout exceeds": { + status: &containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + }, + timeout: 200 * time.Millisecond, + expectErr: true, + }, + "should return error if context is cancelled": { + status: &containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + }, + timeout: time.Hour, + cancel: true, + expectErr: true, + }, + "should not return error if container is stopped before timeout": { + status: &containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + FinishedAt: time.Now().UnixNano(), + }, + timeout: time.Hour, + expectErr: false, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: id}, + containerstore.WithFakeStatus(*test.status), + ) + assert.NoError(t, err) + assert.NoError(t, c.containerStore.Add(container)) + ctx := context.Background() + if test.cancel { + cancelledCtx, cancel := context.WithCancel(ctx) + cancel() + ctx = cancelledCtx + } + if test.timeout > 0 { + timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) + defer cancel() + ctx = timeoutCtx + } + err = c.waitContainerStop(ctx, container) + assert.Equal(t, test.expectErr, err != nil, desc) + }) + } +} diff --git a/pkg/cri/sbserver/container_update_resources.go b/pkg/cri/sbserver/container_update_resources.go new file mode 100644 index 000000000..ee10ff95d --- /dev/null +++ b/pkg/cri/sbserver/container_update_resources.go @@ -0,0 +1,131 @@ +//go:build !darwin && !freebsd +// +build !darwin,!freebsd + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + gocontext "context" + "fmt" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/typeurl" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" +) + +// UpdateContainerResources updates ContainerConfig of the container. +func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) { + container, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("failed to find container: %w", err) + } + // Update resources in status update transaction, so that: + // 1) There won't be race condition with container start. + // 2) There won't be concurrent resource update to the same container. + if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + return status, c.updateContainerResources(ctx, container, r, status) + }); err != nil { + return nil, fmt.Errorf("failed to update resources: %w", err) + } + return &runtime.UpdateContainerResourcesResponse{}, nil +} + +func (c *criService) updateContainerResources(ctx context.Context, + cntr containerstore.Container, + r *runtime.UpdateContainerResourcesRequest, + status containerstore.Status) (retErr error) { + id := cntr.ID + // Do not update the container when there is a removal in progress. + if status.Removing { + return fmt.Errorf("container %q is in removing state", id) + } + + // Update container spec. If the container is not started yet, updating + // spec makes sure that the resource limits are correct when start; + // if the container is already started, updating spec is still required, + // the spec will become our source of truth for resource limits. + oldSpec, err := cntr.Container.Spec(ctx) + if err != nil { + return fmt.Errorf("failed to get container spec: %w", err) + } + newSpec, err := updateOCIResource(ctx, oldSpec, r, c.config) + if err != nil { + return fmt.Errorf("failed to update resource in spec: %w", err) + } + + if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil { + return err + } + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + // Reset spec on error. + if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id) + } + } + }() + + // If container is not running, only update spec is enough, new resource + // limit will be applied when container start. + if status.State() != runtime.ContainerState_CONTAINER_RUNNING { + return nil + } + + task, err := cntr.Container.Task(ctx, nil) + if err != nil { + if errdefs.IsNotFound(err) { + // Task exited already. + return nil + } + return fmt.Errorf("failed to get task: %w", err) + } + // newSpec.Linux / newSpec.Windows won't be nil + if err := task.Update(ctx, containerd.WithResources(getResources(newSpec))); err != nil { + if errdefs.IsNotFound(err) { + // Task exited already. + return nil + } + return fmt.Errorf("failed to update resources: %w", err) + } + return nil +} + +// updateContainerSpec updates container spec. +func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error { + any, err := typeurl.MarshalAny(spec) + if err != nil { + return fmt.Errorf("failed to marshal spec %+v: %w", spec, err) + } + if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error { + c.Spec = any + return nil + }); err != nil { + return fmt.Errorf("failed to update container spec: %w", err) + } + return nil +} diff --git a/pkg/cri/sbserver/container_update_resources_linux.go b/pkg/cri/sbserver/container_update_resources_linux.go new file mode 100644 index 000000000..04186ac11 --- /dev/null +++ b/pkg/cri/sbserver/container_update_resources_linux.go @@ -0,0 +1,51 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + criconfig "github.com/containerd/containerd/pkg/cri/config" + "github.com/containerd/containerd/pkg/cri/opts" + "github.com/containerd/containerd/pkg/cri/util" +) + +// updateOCIResource updates container resource limit. +func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest, + config criconfig.Config) (*runtimespec.Spec, error) { + + // Copy to make sure old spec is not changed. + var cloned runtimespec.Spec + if err := util.DeepCopy(&cloned, spec); err != nil { + return nil, fmt.Errorf("failed to deep copy: %w", err) + } + if cloned.Linux == nil { + cloned.Linux = &runtimespec.Linux{} + } + if err := opts.WithResources(r.GetLinux(), config.TolerateMissingHugetlbController, config.DisableHugetlbController)(ctx, nil, nil, &cloned); err != nil { + return nil, fmt.Errorf("unable to set linux container resources: %w", err) + } + return &cloned, nil +} + +func getResources(spec *runtimespec.Spec) interface{} { + return spec.Linux.Resources +} diff --git a/pkg/cri/sbserver/container_update_resources_linux_test.go b/pkg/cri/sbserver/container_update_resources_linux_test.go new file mode 100644 index 000000000..7014a7f07 --- /dev/null +++ b/pkg/cri/sbserver/container_update_resources_linux_test.go @@ -0,0 +1,230 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + "google.golang.org/protobuf/proto" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + criconfig "github.com/containerd/containerd/pkg/cri/config" +) + +func TestUpdateOCILinuxResource(t *testing.T) { + oomscoreadj := new(int) + *oomscoreadj = -500 + for desc, test := range map[string]struct { + spec *runtimespec.Spec + request *runtime.UpdateContainerResourcesRequest + expected *runtimespec.Spec + expectErr bool + }{ + "should be able to update each resource": { + spec: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(1111), + Quota: proto.Int64(2222), + Period: proto.Uint64(3333), + Cpus: "0-1", + Mems: "2-3", + }, + Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, + }, + }, + }, + request: &runtime.UpdateContainerResourcesRequest{ + Linux: &runtime.LinuxContainerResources{ + CpuPeriod: 6666, + CpuQuota: 5555, + CpuShares: 4444, + MemoryLimitInBytes: 54321, + OomScoreAdj: 500, + CpusetCpus: "4-5", + CpusetMems: "6-7", + Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"}, + }, + }, + expected: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(4444), + Quota: proto.Int64(5555), + Period: proto.Uint64(6666), + Cpus: "4-5", + Mems: "6-7", + }, + Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"}, + }, + }, + }, + }, + "should skip empty fields": { + spec: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(1111), + Quota: proto.Int64(2222), + Period: proto.Uint64(3333), + Cpus: "0-1", + Mems: "2-3", + }, + Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, + }, + }, + }, + request: &runtime.UpdateContainerResourcesRequest{ + Linux: &runtime.LinuxContainerResources{ + CpuQuota: 5555, + CpuShares: 4444, + MemoryLimitInBytes: 54321, + OomScoreAdj: 500, + CpusetMems: "6-7", + }, + }, + expected: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(4444), + Quota: proto.Int64(5555), + Period: proto.Uint64(3333), + Cpus: "0-1", + Mems: "6-7", + }, + Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, + }, + }, + }, + }, + "should be able to fill empty fields": { + spec: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, + }, + }, + }, + request: &runtime.UpdateContainerResourcesRequest{ + Linux: &runtime.LinuxContainerResources{ + CpuPeriod: 6666, + CpuQuota: 5555, + CpuShares: 4444, + MemoryLimitInBytes: 54321, + OomScoreAdj: 500, + CpusetCpus: "4-5", + CpusetMems: "6-7", + Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, + }, + }, + expected: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(4444), + Quota: proto.Int64(5555), + Period: proto.Uint64(6666), + Cpus: "4-5", + Mems: "6-7", + }, + Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, + }, + }, + }, + }, + "should be able to patch the unified map": { + spec: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(1111), + Quota: proto.Int64(2222), + Period: proto.Uint64(3333), + Cpus: "0-1", + Mems: "2-3", + }, + Unified: map[string]string{"memory.min": "65536", "memory.max": "1507328"}, + }, + }, + }, + request: &runtime.UpdateContainerResourcesRequest{ + Linux: &runtime.LinuxContainerResources{ + CpuPeriod: 6666, + CpuQuota: 5555, + CpuShares: 4444, + MemoryLimitInBytes: 54321, + OomScoreAdj: 500, + CpusetCpus: "4-5", + CpusetMems: "6-7", + Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "1024"}, + }, + }, + expected: &runtimespec.Spec{ + Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, + Linux: &runtimespec.Linux{ + Resources: &runtimespec.LinuxResources{ + Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)}, + CPU: &runtimespec.LinuxCPU{ + Shares: proto.Uint64(4444), + Quota: proto.Int64(5555), + Period: proto.Uint64(6666), + Cpus: "4-5", + Mems: "6-7", + }, + Unified: map[string]string{"memory.min": "1507328", "memory.max": "1507328", "memory.swap.max": "1024"}, + }, + }, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + config := criconfig.Config{ + PluginConfig: criconfig.PluginConfig{ + TolerateMissingHugetlbController: true, + DisableHugetlbController: false, + }, + } + got, err := updateOCIResource(context.Background(), test.spec, test.request, config) + if test.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + assert.Equal(t, test.expected, got) + }) + } +} diff --git a/pkg/cri/sbserver/container_update_resources_other.go b/pkg/cri/sbserver/container_update_resources_other.go new file mode 100644 index 000000000..944159beb --- /dev/null +++ b/pkg/cri/sbserver/container_update_resources_other.go @@ -0,0 +1,46 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + containerstore "github.com/containerd/containerd/pkg/cri/store/container" +) + +// UpdateContainerResources updates ContainerConfig of the container. +func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) { + container, err := c.containerStore.Get(r.GetContainerId()) + if err != nil { + return nil, fmt.Errorf("failed to find container: %w", err) + } + // Update resources in status update transaction, so that: + // 1) There won't be race condition with container start. + // 2) There won't be concurrent resource update to the same container. + if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + return status, nil + }); err != nil { + return nil, fmt.Errorf("failed to update resources: %w", err) + } + return &runtime.UpdateContainerResourcesResponse{}, nil +} diff --git a/pkg/cri/sbserver/container_update_resources_windows.go b/pkg/cri/sbserver/container_update_resources_windows.go new file mode 100644 index 000000000..adbad53fd --- /dev/null +++ b/pkg/cri/sbserver/container_update_resources_windows.go @@ -0,0 +1,51 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + criconfig "github.com/containerd/containerd/pkg/cri/config" + "github.com/containerd/containerd/pkg/cri/opts" + "github.com/containerd/containerd/pkg/cri/util" +) + +// updateOCIResource updates container resource limit. +func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest, + config criconfig.Config) (*runtimespec.Spec, error) { + + // Copy to make sure old spec is not changed. + var cloned runtimespec.Spec + if err := util.DeepCopy(&cloned, spec); err != nil { + return nil, fmt.Errorf("failed to deep copy: %w", err) + } + if cloned.Windows == nil { + cloned.Windows = &runtimespec.Windows{} + } + if err := opts.WithWindowsResources(r.GetWindows())(ctx, nil, nil, &cloned); err != nil { + return nil, fmt.Errorf("unable to set windows container resources: %w", err) + } + return &cloned, nil +} + +func getResources(spec *runtimespec.Spec) interface{} { + return spec.Windows.Resources +} diff --git a/pkg/cri/sbserver/cri_fuzzer.go b/pkg/cri/sbserver/cri_fuzzer.go new file mode 100644 index 000000000..734579ce7 --- /dev/null +++ b/pkg/cri/sbserver/cri_fuzzer.go @@ -0,0 +1,578 @@ +//go:build gofuzz +// +build gofuzz + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + golangruntime "runtime" + "strings" + "sync" + "time" + + fuzz "github.com/AdaLogics/go-fuzz-headers" + "github.com/containerd/go-cni" + + criconfig "github.com/containerd/containerd/pkg/cri/config" + servertesting "github.com/containerd/containerd/pkg/cri/server/testing" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + "github.com/containerd/containerd/pkg/cri/store/label" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" + ostesting "github.com/containerd/containerd/pkg/os/testing" + "github.com/containerd/containerd/pkg/registrar" + + "github.com/containerd/containerd" + _ "github.com/containerd/containerd/cmd/containerd/builtins" + "github.com/containerd/containerd/cmd/containerd/command" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +var ( + // The APIs the fuzzer can call: + ops = map[int]string{ + 0: "createContainer", + 1: "removeContainer", + 2: "addSandboxes", + 3: "listContainers", + 4: "startContainer", + 5: "containerStats", + 6: "listContainerStats", + 7: "containerStatus", + 8: "stopContainer", + 9: "updateContainerResources", + 10: "listImages", + 11: "removeImages", + 12: "imageStatus", + 13: "imageFsInfo", + 14: "listPodSandbox", + 15: "portForward", + 16: "removePodSandbox", + 17: "runPodSandbox", + 18: "podSandboxStatus", + 19: "stopPodSandbox", + 20: "status", + 21: "updateRuntimeConfig", + } +) + +const ( + defaultRoot = "/var/lib/containerd" + defaultState = "/tmp/containerd" + defaultAddress = "/tmp/containerd/containerd.sock" +) + +var ( + initDaemon sync.Once + + executionOrder []string +) + +func startDaemonForFuzzing(arguments []string) { + app := command.App() + _ = app.Run(arguments) +} + +func startDaemon() { + args := []string{"--log-level", "debug"} + go func() { + // This is similar to invoking the + // containerd binary. + // See contrib/fuzz/oss_fuzz_build.sh + // for more info. + startDaemonForFuzzing(args) + }() + time.Sleep(time.Second * 4) +} + +func printExecutions() { + if r := recover(); r != nil { + var err string + switch r.(type) { + case string: + err = r.(string) + case golangruntime.Error: + err = r.(golangruntime.Error).Error() + case error: + err = r.(error).Error() + default: + err = "uknown error type" + } + fmt.Println("Executions:") + for _, eo := range executionOrder { + fmt.Println(eo) + } + panic(err) + } +} + +// FuzzCRI implements a fuzzer that tests CRI APIs. +func FuzzCRI(data []byte) int { + initDaemon.Do(startDaemon) + + executionOrder = make([]string, 0) + + f := fuzz.NewConsumer(data) + + client, err := containerd.New(defaultAddress) + if err != nil { + return 0 + } + defer client.Close() + + c, err := NewCRIService(criconfig.Config{}, client) + if err != nil { + panic(err) + } + + calls, err := f.GetInt() + if err != nil { + return 0 + } + + defer printExecutions() + for i := 0; i < calls%40; i++ { + op, err := f.GetInt() + if err != nil { + return 0 + } + opType := op % len(ops) + + switch ops[opType] { + case "createContainer": + createContainerFuzz(c.(*criService), f) + case "removeContainer": + removeContainerFuzz(c.(*criService), f) + case "addSandboxes": + addSandboxesFuzz(c.(*criService), f) + case "listContainers": + listContainersFuzz(c.(*criService), f) + case "startContainer": + startContainerFuzz(c.(*criService), f) + case "containerStats": + containerStatsFuzz(c.(*criService), f) + case "listContainerStats": + listContainerStatsFuzz(c.(*criService), f) + case "containerStatus": + containerStatusFuzz(c.(*criService), f) + case "stopContainer": + stopContainerFuzz(c.(*criService), f) + case "updateContainerResources": + updateContainerResourcesFuzz(c.(*criService), f) + case "listImages": + listImagesFuzz(c.(*criService), f) + case "removeImages": + removeImagesFuzz(c.(*criService), f) + case "imageStatus": + imageStatusFuzz(c.(*criService), f) + case "imageFsInfo": + imageFsInfoFuzz(c.(*criService), f) + case "listPodSandbox": + listPodSandboxFuzz(c.(*criService), f) + case "portForward": + portForwardFuzz(c.(*criService), f) + case "removePodSandbox": + removePodSandboxFuzz(c.(*criService), f) + case "runPodSandbox": + runPodSandboxFuzz(c.(*criService), f) + case "podSandboxStatus": + podSandboxStatusFuzz(c.(*criService), f) + case "stopPodSandbox": + stopPodSandboxFuzz(c.(*criService), f) + case "status": + statusFuzz(c.(*criService), f) + case "updateRuntimeConfig": + updateRuntimeConfigFuzz(c.(*criService), f) + } + } + return 1 +} + +func logExecution(apiName, request string) { + var logString strings.Builder + logString.WriteString(fmt.Sprintf("Calling %s with \n %s \n\n", apiName, request)) + executionOrder = append(executionOrder, logString.String()) +} + +// createContainerFuzz creates a CreateContainerRequest and passes +// it to c.CreateContainer +func createContainerFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.CreateContainerRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.CreateContainer(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.CreateContainer", reqString) + return nil +} + +// removeContainerFuzz creates a RemoveContainerRequest and passes +// it to c.RemoveContainer +func removeContainerFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.RemoveContainerRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.RemoveContainer(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.RemoveContainer", reqString) + return nil +} + +// addSandboxesFuzz creates a sandbox and adds it to the sandboxstore +func addSandboxesFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + quantity, err := f.GetInt() + if err != nil { + return err + } + for i := 0; i < quantity%20; i++ { + newSandbox, err := getSandboxFuzz(f) + if err != nil { + return err + } + err = c.sandboxStore.Add(newSandbox) + if err != nil { + return err + } + } + return nil +} + +// getSandboxFuzz creates a sandbox +func getSandboxFuzz(f *fuzz.ConsumeFuzzer) (sandboxstore.Sandbox, error) { + metadata := sandboxstore.Metadata{} + status := sandboxstore.Status{} + err := f.GenerateStruct(&metadata) + if err != nil { + return sandboxstore.Sandbox{}, err + } + err = f.GenerateStruct(&status) + if err != nil { + return sandboxstore.Sandbox{}, err + } + + reqString := fmt.Sprintf("metadata: %+v\nstatus: %+v\n", metadata, status) + logExecution("sandboxstore.NewSandbox", reqString) + + return sandboxstore.NewSandbox(metadata, status), nil +} + +// listContainersFuzz creates a ListContainersRequest and passes +// it to c.ListContainers +func listContainersFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ListContainersRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ListContainers(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ListContainers", reqString) + return nil +} + +// startContainerFuzz creates a StartContainerRequest and passes +// it to c.StartContainer +func startContainerFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.StartContainerRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.StartContainer(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.StartContainer", reqString) + return nil +} + +// containerStatsFuzz creates a ContainerStatsRequest and passes +// it to c.ContainerStats +func containerStatsFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ContainerStatsRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ContainerStats(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ContainerStats", reqString) + return nil +} + +// listContainerStatsFuzz creates a ListContainerStatsRequest and +// passes it to c.ListContainerStats +func listContainerStatsFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ListContainerStatsRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ListContainerStats(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ListContainerStats", reqString) + return nil +} + +// containerStatusFuzz creates a ContainerStatusRequest and passes +// it to c.ContainerStatus +func containerStatusFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ContainerStatusRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ContainerStatus(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ContainerStatus", reqString) + return nil +} + +// stopContainerFuzz creates a StopContainerRequest and passes +// it to c.StopContainer +func stopContainerFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.StopContainerRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.StopContainer(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.StopContainer", reqString) + return nil +} + +// updateContainerResourcesFuzz creates a UpdateContainerResourcesRequest +// and passes it to c.UpdateContainerResources +func updateContainerResourcesFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.UpdateContainerResourcesRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.UpdateContainerResources(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.UpdateContainerResources", reqString) + return nil +} + +// listImagesFuzz creates a ListImagesRequest and passes it to +// c.ListImages +func listImagesFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ListImagesRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ListImages(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ListImages", reqString) + return nil +} + +// removeImagesFuzz creates a RemoveImageRequest and passes it to +// c.RemoveImage +func removeImagesFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.RemoveImageRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.RemoveImage(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.RemoveImage", reqString) + return nil +} + +// imageStatusFuzz creates an ImageStatusRequest and passes it to +// c.ImageStatus +func imageStatusFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ImageStatusRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ImageStatus(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ImageStatus", reqString) + return nil +} + +// imageFsInfoFuzz creates an ImageFsInfoRequest and passes it to +// c.ImageFsInfo +func imageFsInfoFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ImageFsInfoRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ImageFsInfo(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ImageFsInfo", reqString) + return nil +} + +// listPodSandboxFuzz creates a ListPodSandboxRequest and passes +// it to c.ListPodSandbox +func listPodSandboxFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.ListPodSandboxRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.ListPodSandbox(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.ListPodSandbox", reqString) + return nil +} + +// portForwardFuzz creates a PortForwardRequest and passes it to +// c.PortForward +func portForwardFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.PortForwardRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.PortForward(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.PortForward", reqString) + return nil +} + +// removePodSandboxFuzz creates a RemovePodSandboxRequest and +// passes it to c.RemovePodSandbox +func removePodSandboxFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.RemovePodSandboxRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.RemovePodSandbox(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.RemovePodSandbox", reqString) + return nil +} + +// runPodSandboxFuzz creates a RunPodSandboxRequest and passes +// it to c.RunPodSandbox +func runPodSandboxFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.RunPodSandboxRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.RunPodSandbox(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.RunPodSandbox", reqString) + return nil +} + +// podSandboxStatusFuzz creates a PodSandboxStatusRequest and +// passes it to +func podSandboxStatusFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.PodSandboxStatusRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.PodSandboxStatus(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.PodSandboxStatus", reqString) + return nil +} + +// stopPodSandboxFuzz creates a StopPodSandboxRequest and passes +// it to c.StopPodSandbox +func stopPodSandboxFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.StopPodSandboxRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.StopPodSandbox(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.StopPodSandbox", reqString) + return nil +} + +// statusFuzz creates a StatusRequest and passes it to c.Status +func statusFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.StatusRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.Status(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.Status", reqString) + return nil +} + +func updateRuntimeConfigFuzz(c *criService, f *fuzz.ConsumeFuzzer) error { + r := &runtime.UpdateRuntimeConfigRequest{} + err := f.GenerateStruct(r) + if err != nil { + return err + } + _, _ = c.UpdateRuntimeConfig(context.Background(), r) + reqString := fmt.Sprintf("%+v", r) + logExecution("c.UpdateRuntimeConfig", reqString) + return nil +} + +// This creates a container directly in the store. +func getContainer(f *fuzz.ConsumeFuzzer) (containerstore.Container, error) { + metadata := containerstore.Metadata{} + status := containerstore.Status{} + + err := f.GenerateStruct(&metadata) + if err != nil { + return containerstore.Container{}, err + } + err = f.GenerateStruct(&status) + if err != nil { + return containerstore.Container{}, err + } + container, err := containerstore.NewContainer(metadata, containerstore.WithFakeStatus(status)) + return container, err +} + +func newTestCRIServiceForFuzzing(f *fuzz.ConsumeFuzzer) *criService { + labels := label.NewStore() + + return &criService{ + config: testConfig, + imageFSPath: testImageFSPath, + os: ostesting.NewFakeOS(), + sandboxStore: sandboxstore.NewStore(labels), + imageStore: imagestore.NewStore(nil), + snapshotStore: snapshotstore.NewStore(), + sandboxNameIndex: registrar.NewRegistrar(), + containerStore: containerstore.NewStore(labels), + containerNameIndex: registrar.NewRegistrar(), + netPlugin: map[string]cni.CNI{ + defaultNetworkPlugin: servertesting.NewFakeCNIPlugin(), + }, + } +} diff --git a/pkg/cri/sbserver/events.go b/pkg/cri/sbserver/events.go new file mode 100644 index 000000000..38c1794a9 --- /dev/null +++ b/pkg/cri/sbserver/events.go @@ -0,0 +1,541 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/containerd/containerd" + eventtypes "github.com/containerd/containerd/api/events" + containerdio "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/pkg/cri/constants" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + "github.com/containerd/containerd/protobuf" + "github.com/containerd/typeurl" + "github.com/sirupsen/logrus" + "k8s.io/utils/clock" +) + +const ( + backOffInitDuration = 1 * time.Second + backOffMaxDuration = 5 * time.Minute + backOffExpireCheckDuration = 1 * time.Second + + // handleEventTimeout is the timeout for handling 1 event. Event monitor + // handles events in serial, if one event blocks the event monitor, no + // other events can be handled. + // Add a timeout for each event handling, events that timeout will be requeued and + // handled again in the future. + handleEventTimeout = 10 * time.Second +) + +// eventMonitor monitors containerd event and updates internal state correspondingly. +type eventMonitor struct { + c *criService + ch <-chan *events.Envelope + errCh <-chan error + ctx context.Context + cancel context.CancelFunc + backOff *backOff +} + +type backOff struct { + // queuePoolMu is mutex used to protect the queuePool map + queuePoolMu sync.Mutex + + queuePool map[string]*backOffQueue + // tickerMu is mutex used to protect the ticker. + tickerMu sync.Mutex + ticker *time.Ticker + minDuration time.Duration + maxDuration time.Duration + checkDuration time.Duration + clock clock.Clock +} + +type backOffQueue struct { + events []interface{} + expireTime time.Time + duration time.Duration + clock clock.Clock +} + +// Create new event monitor. New event monitor will start subscribing containerd event. All events +// happen after it should be monitored. +func newEventMonitor(c *criService) *eventMonitor { + ctx, cancel := context.WithCancel(context.Background()) + return &eventMonitor{ + c: c, + ctx: ctx, + cancel: cancel, + backOff: newBackOff(), + } +} + +// subscribe starts to subscribe containerd events. +func (em *eventMonitor) subscribe(subscriber events.Subscriber) { + // note: filters are any match, if you want any match but not in namespace foo + // then you have to manually filter namespace foo + filters := []string{ + `topic=="/tasks/oom"`, + `topic~="/images/"`, + } + em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...) +} + +// startSandboxExitMonitor starts an exit monitor for a given sandbox. +func (em *eventMonitor) startSandboxExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} { + stopCh := make(chan struct{}) + go func() { + defer close(stopCh) + select { + case exitRes := <-exitCh: + exitStatus, exitedAt, err := exitRes.Result() + if err != nil { + logrus.WithError(err).Errorf("failed to get task exit status for %q", id) + exitStatus = unknownExitCode + exitedAt = time.Now() + } + + e := &eventtypes.TaskExit{ + ContainerID: id, + ID: id, + Pid: pid, + ExitStatus: exitStatus, + ExitedAt: protobuf.ToTimestamp(exitedAt), + } + + logrus.Debugf("received exit event %+v", e) + + err = func() error { + dctx := ctrdutil.NamespacedContext() + dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout) + defer dcancel() + + sb, err := em.c.sandboxStore.Get(e.ID) + if err == nil { + if err := handleSandboxExit(dctx, e, sb); err != nil { + return err + } + return nil + } else if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to get sandbox %s: %w", e.ID, err) + } + return nil + }() + if err != nil { + logrus.WithError(err).Errorf("failed to handle sandbox TaskExit event %+v", e) + em.backOff.enBackOff(id, e) + } + return + case <-ctx.Done(): + } + }() + return stopCh +} + +// startContainerExitMonitor starts an exit monitor for a given container. +func (em *eventMonitor) startContainerExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} { + stopCh := make(chan struct{}) + go func() { + defer close(stopCh) + select { + case exitRes := <-exitCh: + exitStatus, exitedAt, err := exitRes.Result() + if err != nil { + logrus.WithError(err).Errorf("failed to get task exit status for %q", id) + exitStatus = unknownExitCode + exitedAt = time.Now() + } + + e := &eventtypes.TaskExit{ + ContainerID: id, + ID: id, + Pid: pid, + ExitStatus: exitStatus, + ExitedAt: protobuf.ToTimestamp(exitedAt), + } + + logrus.Debugf("received exit event %+v", e) + + err = func() error { + dctx := ctrdutil.NamespacedContext() + dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout) + defer dcancel() + + cntr, err := em.c.containerStore.Get(e.ID) + if err == nil { + if err := handleContainerExit(dctx, e, cntr); err != nil { + return err + } + return nil + } else if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to get container %s: %w", e.ID, err) + } + return nil + }() + if err != nil { + logrus.WithError(err).Errorf("failed to handle container TaskExit event %+v", e) + em.backOff.enBackOff(id, e) + } + return + case <-ctx.Done(): + } + }() + return stopCh +} + +func convertEvent(e typeurl.Any) (string, interface{}, error) { + id := "" + evt, err := typeurl.UnmarshalAny(e) + if err != nil { + return "", nil, fmt.Errorf("failed to unmarshalany: %w", err) + } + + switch e := evt.(type) { + case *eventtypes.TaskOOM: + id = e.ContainerID + case *eventtypes.ImageCreate: + id = e.Name + case *eventtypes.ImageUpdate: + id = e.Name + case *eventtypes.ImageDelete: + id = e.Name + default: + return "", nil, errors.New("unsupported event") + } + return id, evt, nil +} + +// start starts the event monitor which monitors and handles all subscribed events. +// It returns an error channel for the caller to wait for stop errors from the +// event monitor. +// +// NOTE: +// 1. start must be called after subscribe. +// 2. The task exit event has been handled in individual startSandboxExitMonitor +// or startContainerExitMonitor goroutine at the first. If the goroutine fails, +// it puts the event into backoff retry queue and event monitor will handle +// it later. +func (em *eventMonitor) start() <-chan error { + errCh := make(chan error) + if em.ch == nil || em.errCh == nil { + panic("event channel is nil") + } + backOffCheckCh := em.backOff.start() + go func() { + defer close(errCh) + for { + select { + case e := <-em.ch: + logrus.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic) + if e.Namespace != constants.K8sContainerdNamespace { + logrus.Debugf("Ignoring events in namespace - %q", e.Namespace) + break + } + id, evt, err := convertEvent(e.Event) + if err != nil { + logrus.WithError(err).Errorf("Failed to convert event %+v", e) + break + } + if em.backOff.isInBackOff(id) { + logrus.Infof("Events for %q is in backoff, enqueue event %+v", id, evt) + em.backOff.enBackOff(id, evt) + break + } + if err := em.handleEvent(evt); err != nil { + logrus.WithError(err).Errorf("Failed to handle event %+v for %s", evt, id) + em.backOff.enBackOff(id, evt) + } + case err := <-em.errCh: + // Close errCh in defer directly if there is no error. + if err != nil { + logrus.WithError(err).Error("Failed to handle event stream") + errCh <- err + } + return + case <-backOffCheckCh: + ids := em.backOff.getExpiredIDs() + for _, id := range ids { + queue := em.backOff.deBackOff(id) + for i, any := range queue.events { + if err := em.handleEvent(any); err != nil { + logrus.WithError(err).Errorf("Failed to handle backOff event %+v for %s", any, id) + em.backOff.reBackOff(id, queue.events[i:], queue.duration) + break + } + } + } + } + } + }() + return errCh +} + +// stop stops the event monitor. It will close the event channel. +// Once event monitor is stopped, it can't be started. +func (em *eventMonitor) stop() { + em.backOff.stop() + em.cancel() +} + +// handleEvent handles a containerd event. +func (em *eventMonitor) handleEvent(any interface{}) error { + ctx := ctrdutil.NamespacedContext() + ctx, cancel := context.WithTimeout(ctx, handleEventTimeout) + defer cancel() + + switch e := any.(type) { + case *eventtypes.TaskExit: + logrus.Infof("TaskExit event %+v", e) + // Use ID instead of ContainerID to rule out TaskExit event for exec. + cntr, err := em.c.containerStore.Get(e.ID) + if err == nil { + if err := handleContainerExit(ctx, e, cntr); err != nil { + return fmt.Errorf("failed to handle container TaskExit event: %w", err) + } + return nil + } else if !errdefs.IsNotFound(err) { + return fmt.Errorf("can't find container for TaskExit event: %w", err) + } + sb, err := em.c.sandboxStore.Get(e.ID) + if err == nil { + if err := handleSandboxExit(ctx, e, sb); err != nil { + return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err) + } + return nil + } else if !errdefs.IsNotFound(err) { + return fmt.Errorf("can't find sandbox for TaskExit event: %w", err) + } + return nil + case *eventtypes.TaskOOM: + logrus.Infof("TaskOOM event %+v", e) + // For TaskOOM, we only care which container it belongs to. + cntr, err := em.c.containerStore.Get(e.ContainerID) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("can't find container for TaskOOM event: %w", err) + } + return nil + } + err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + status.Reason = oomExitReason + return status, nil + }) + if err != nil { + return fmt.Errorf("failed to update container status for TaskOOM event: %w", err) + } + case *eventtypes.ImageCreate: + logrus.Infof("ImageCreate event %+v", e) + return em.c.updateImage(ctx, e.Name) + case *eventtypes.ImageUpdate: + logrus.Infof("ImageUpdate event %+v", e) + return em.c.updateImage(ctx, e.Name) + case *eventtypes.ImageDelete: + logrus.Infof("ImageDelete event %+v", e) + return em.c.updateImage(ctx, e.Name) + } + + return nil +} + +// handleContainerExit handles TaskExit event for container. +func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr containerstore.Container) error { + // Attach container IO so that `Delete` could cleanup the stream properly. + task, err := cntr.Container.Task(ctx, + func(*containerdio.FIFOSet) (containerdio.IO, error) { + // We can't directly return cntr.IO here, because + // even if cntr.IO is nil, the cio.IO interface + // is not. + // See https://tour.golang.org/methods/12: + // Note that an interface value that holds a nil + // concrete value is itself non-nil. + if cntr.IO != nil { + return cntr.IO, nil + } + return nil, nil + }, + ) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to load task for container: %w", err) + } + } else { + // TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker + if _, err = task.Delete(ctx, WithNRISandboxDelete(cntr.SandboxID), containerd.WithProcessKill); err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to stop container: %w", err) + } + // Move on to make sure container status is updated. + } + } + err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + if status.FinishedAt == 0 { + status.Pid = 0 + status.FinishedAt = protobuf.FromTimestamp(e.ExitedAt).UnixNano() + status.ExitCode = int32(e.ExitStatus) + } + + // Unknown state can only transit to EXITED state, so we need + // to handle unknown state here. + if status.Unknown { + logrus.Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID) + status.Unknown = false + } + return status, nil + }) + if err != nil { + return fmt.Errorf("failed to update container state: %w", err) + } + // Using channel to propagate the information of container stop + cntr.Stop() + return nil +} + +// handleSandboxExit handles TaskExit event for sandbox. +func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxstore.Sandbox) error { + // No stream attached to sandbox container. + task, err := sb.Container.Task(ctx, nil) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to load task for sandbox: %w", err) + } + } else { + // TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker + if _, err = task.Delete(ctx, WithNRISandboxDelete(sb.ID), containerd.WithProcessKill); err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to stop sandbox: %w", err) + } + // Move on to make sure container status is updated. + } + } + err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { + status.State = sandboxstore.StateNotReady + status.Pid = 0 + return status, nil + }) + if err != nil { + return fmt.Errorf("failed to update sandbox state: %w", err) + } + // Using channel to propagate the information of sandbox stop + sb.Stop() + return nil +} + +func newBackOff() *backOff { + return &backOff{ + queuePool: map[string]*backOffQueue{}, + minDuration: backOffInitDuration, + maxDuration: backOffMaxDuration, + checkDuration: backOffExpireCheckDuration, + clock: clock.RealClock{}, + } +} + +func (b *backOff) getExpiredIDs() []string { + b.queuePoolMu.Lock() + defer b.queuePoolMu.Unlock() + + var ids []string + for id, q := range b.queuePool { + if q.isExpire() { + ids = append(ids, id) + } + } + return ids +} + +func (b *backOff) isInBackOff(key string) bool { + b.queuePoolMu.Lock() + defer b.queuePoolMu.Unlock() + + if _, ok := b.queuePool[key]; ok { + return true + } + return false +} + +// enBackOff start to backOff and put event to the tail of queue +func (b *backOff) enBackOff(key string, evt interface{}) { + b.queuePoolMu.Lock() + defer b.queuePoolMu.Unlock() + + if queue, ok := b.queuePool[key]; ok { + queue.events = append(queue.events, evt) + return + } + b.queuePool[key] = newBackOffQueue([]interface{}{evt}, b.minDuration, b.clock) +} + +// enBackOff get out the whole queue +func (b *backOff) deBackOff(key string) *backOffQueue { + b.queuePoolMu.Lock() + defer b.queuePoolMu.Unlock() + + queue := b.queuePool[key] + delete(b.queuePool, key) + return queue +} + +// enBackOff start to backOff again and put events to the queue +func (b *backOff) reBackOff(key string, events []interface{}, oldDuration time.Duration) { + b.queuePoolMu.Lock() + defer b.queuePoolMu.Unlock() + + duration := 2 * oldDuration + if duration > b.maxDuration { + duration = b.maxDuration + } + b.queuePool[key] = newBackOffQueue(events, duration, b.clock) +} + +func (b *backOff) start() <-chan time.Time { + b.tickerMu.Lock() + defer b.tickerMu.Unlock() + b.ticker = time.NewTicker(b.checkDuration) + return b.ticker.C +} + +func (b *backOff) stop() { + b.tickerMu.Lock() + defer b.tickerMu.Unlock() + if b.ticker != nil { + b.ticker.Stop() + } +} + +func newBackOffQueue(events []interface{}, init time.Duration, c clock.Clock) *backOffQueue { + return &backOffQueue{ + events: events, + duration: init, + expireTime: c.Now().Add(init), + clock: c, + } +} + +func (q *backOffQueue) isExpire() bool { + // return time.Now >= expireTime + return !q.clock.Now().Before(q.expireTime) +} diff --git a/pkg/cri/sbserver/events_test.go b/pkg/cri/sbserver/events_test.go new file mode 100644 index 000000000..613e82bae --- /dev/null +++ b/pkg/cri/sbserver/events_test.go @@ -0,0 +1,136 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + "time" + + eventtypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/protobuf" + "github.com/containerd/typeurl" + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + testingclock "k8s.io/utils/clock/testing" +) + +// TestBackOff tests the logic of backOff struct. +func TestBackOff(t *testing.T) { + testStartTime := time.Now() + testClock := testingclock.NewFakeClock(testStartTime) + inputQueues := map[string]*backOffQueue{ + "container1": { + events: []interface{}{ + &eventtypes.TaskOOM{ContainerID: "container1"}, + &eventtypes.TaskOOM{ContainerID: "container1"}, + }, + }, + "container2": { + events: []interface{}{ + &eventtypes.TaskOOM{ContainerID: "container2"}, + &eventtypes.TaskOOM{ContainerID: "container2"}, + }, + }, + } + expectedQueues := map[string]*backOffQueue{ + "container2": { + events: []interface{}{ + &eventtypes.TaskOOM{ContainerID: "container2"}, + &eventtypes.TaskOOM{ContainerID: "container2"}, + }, + expireTime: testClock.Now().Add(backOffInitDuration), + duration: backOffInitDuration, + clock: testClock, + }, + "container1": { + events: []interface{}{ + &eventtypes.TaskOOM{ContainerID: "container1"}, + &eventtypes.TaskOOM{ContainerID: "container1"}, + }, + expireTime: testClock.Now().Add(backOffInitDuration), + duration: backOffInitDuration, + clock: testClock, + }, + } + + t.Logf("Should be able to backOff a event") + actual := newBackOff() + actual.clock = testClock + for k, queue := range inputQueues { + for _, event := range queue.events { + actual.enBackOff(k, event) + } + } + assert.Equal(t, actual.queuePool, expectedQueues) + + t.Logf("Should be able to check if the container is in backOff state") + for k, queue := range inputQueues { + for _, e := range queue.events { + any, err := typeurl.MarshalAny(e) + assert.NoError(t, err) + key, _, err := convertEvent(any) + assert.NoError(t, err) + assert.Equal(t, k, key) + assert.Equal(t, actual.isInBackOff(key), true) + } + } + + t.Logf("Should be able to check that a container isn't in backOff state") + notExistKey := "containerNotExist" + assert.Equal(t, actual.isInBackOff(notExistKey), false) + + t.Logf("No containers should be expired") + assert.Empty(t, actual.getExpiredIDs()) + + t.Logf("Should be able to get all keys which are expired for backOff") + testClock.Sleep(backOffInitDuration) + actKeyList := actual.getExpiredIDs() + assert.Equal(t, len(inputQueues), len(actKeyList)) + for k := range inputQueues { + assert.Contains(t, actKeyList, k) + } + + t.Logf("Should be able to get out all backOff events") + doneQueues := map[string]*backOffQueue{} + for k := range inputQueues { + actQueue := actual.deBackOff(k) + doneQueues[k] = actQueue + assert.True(t, cmp.Equal(actQueue.events, expectedQueues[k].events, protobuf.Compare)) + } + + t.Logf("Should not get out the event again after having got out the backOff event") + for k := range inputQueues { + var expect *backOffQueue + actQueue := actual.deBackOff(k) + assert.Equal(t, actQueue, expect) + } + + t.Logf("Should be able to reBackOff") + for k, queue := range doneQueues { + failEventIndex := 1 + events := queue.events[failEventIndex:] + actual.reBackOff(k, events, queue.duration) + actQueue := actual.deBackOff(k) + expQueue := &backOffQueue{ + events: events, + expireTime: testClock.Now().Add(2 * queue.duration), + duration: 2 * queue.duration, + clock: testClock, + } + assert.Equal(t, actQueue, expQueue) + } +} diff --git a/pkg/cri/sbserver/helpers.go b/pkg/cri/sbserver/helpers.go new file mode 100644 index 000000000..88ea39698 --- /dev/null +++ b/pkg/cri/sbserver/helpers.go @@ -0,0 +1,431 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "path" + "path/filepath" + "strconv" + "strings" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/errdefs" + clabels "github.com/containerd/containerd/labels" + criconfig "github.com/containerd/containerd/pkg/cri/config" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + runtimeoptions "github.com/containerd/containerd/pkg/runtimeoptions/v1" + "github.com/containerd/containerd/plugin" + "github.com/containerd/containerd/reference/docker" + "github.com/containerd/containerd/runtime/linux/runctypes" + runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" + "github.com/containerd/typeurl" + "github.com/sirupsen/logrus" + + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + imagedigest "github.com/opencontainers/go-digest" + "github.com/pelletier/go-toml" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +const ( + // errorStartReason is the exit reason when fails to start container. + errorStartReason = "StartError" + // errorStartExitCode is the exit code when fails to start container. + // 128 is the same with Docker's behavior. + // TODO(windows): Figure out what should be used for windows. + errorStartExitCode = 128 + // completeExitReason is the exit reason when container exits with code 0. + completeExitReason = "Completed" + // errorExitReason is the exit reason when container exits with code non-zero. + errorExitReason = "Error" + // oomExitReason is the exit reason when process in container is oom killed. + oomExitReason = "OOMKilled" + + // sandboxesDir contains all sandbox root. A sandbox root is the running + // directory of the sandbox, all files created for the sandbox will be + // placed under this directory. + sandboxesDir = "sandboxes" + // containersDir contains all container root. + containersDir = "containers" + // Delimiter used to construct container/sandbox names. + nameDelimiter = "_" + + // criContainerdPrefix is common prefix for cri-containerd + criContainerdPrefix = "io.cri-containerd" + // containerKindLabel is a label key indicating container is sandbox container or application container + containerKindLabel = criContainerdPrefix + ".kind" + // containerKindSandbox is a label value indicating container is sandbox container + containerKindSandbox = "sandbox" + // containerKindContainer is a label value indicating container is application container + containerKindContainer = "container" + // imageLabelKey is the label key indicating the image is managed by cri plugin. + imageLabelKey = criContainerdPrefix + ".image" + // imageLabelValue is the label value indicating the image is managed by cri plugin. + imageLabelValue = "managed" + // sandboxMetadataExtension is an extension name that identify metadata of sandbox in CreateContainerRequest + sandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata" + // containerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest + containerMetadataExtension = criContainerdPrefix + ".container.metadata" + + // defaultIfName is the default network interface for the pods + defaultIfName = "eth0" + + // runtimeRunhcsV1 is the runtime type for runhcs. + runtimeRunhcsV1 = "io.containerd.runhcs.v1" +) + +// makeSandboxName generates sandbox name from sandbox metadata. The name +// generated is unique as long as sandbox metadata is unique. +func makeSandboxName(s *runtime.PodSandboxMetadata) string { + return strings.Join([]string{ + s.Name, // 0 + s.Namespace, // 1 + s.Uid, // 2 + fmt.Sprintf("%d", s.Attempt), // 3 + }, nameDelimiter) +} + +// makeContainerName generates container name from sandbox and container metadata. +// The name generated is unique as long as the sandbox container combination is +// unique. +func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetadata) string { + return strings.Join([]string{ + c.Name, // 0: container name + s.Name, // 1: pod name + s.Namespace, // 2: pod namespace + s.Uid, // 3: pod uid + fmt.Sprintf("%d", c.Attempt), // 4: attempt number of creating the container + }, nameDelimiter) +} + +// getSandboxRootDir returns the root directory for managing sandbox files, +// e.g. hosts files. +func (c *criService) getSandboxRootDir(id string) string { + return filepath.Join(c.config.RootDir, sandboxesDir, id) +} + +// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files, +// e.g. named pipes. +func (c *criService) getVolatileSandboxRootDir(id string) string { + return filepath.Join(c.config.StateDir, sandboxesDir, id) +} + +// getContainerRootDir returns the root directory for managing container files, +// e.g. state checkpoint. +func (c *criService) getContainerRootDir(id string) string { + return filepath.Join(c.config.RootDir, containersDir, id) +} + +// getVolatileContainerRootDir returns the root directory for managing volatile container files, +// e.g. named pipes. +func (c *criService) getVolatileContainerRootDir(id string) string { + return filepath.Join(c.config.StateDir, containersDir, id) +} + +// criContainerStateToString formats CRI container state to string. +func criContainerStateToString(state runtime.ContainerState) string { + return runtime.ContainerState_name[int32(state)] +} + +// getRepoDigestAngTag returns image repoDigest and repoTag of the named image reference. +func getRepoDigestAndTag(namedRef docker.Named, digest imagedigest.Digest, schema1 bool) (string, string) { + var repoTag, repoDigest string + if _, ok := namedRef.(docker.NamedTagged); ok { + repoTag = namedRef.String() + } + if _, ok := namedRef.(docker.Canonical); ok { + repoDigest = namedRef.String() + } else if !schema1 { + // digest is not actual repo digest for schema1 image. + repoDigest = namedRef.Name() + "@" + digest.String() + } + return repoDigest, repoTag +} + +// localResolve resolves image reference locally and returns corresponding image metadata. It +// returns errdefs.ErrNotFound if the reference doesn't exist. +func (c *criService) localResolve(refOrID string) (imagestore.Image, error) { + getImageID := func(refOrId string) string { + if _, err := imagedigest.Parse(refOrID); err == nil { + return refOrID + } + return func(ref string) string { + // ref is not image id, try to resolve it locally. + // TODO(random-liu): Handle this error better for debugging. + normalized, err := docker.ParseDockerRef(ref) + if err != nil { + return "" + } + id, err := c.imageStore.Resolve(normalized.String()) + if err != nil { + return "" + } + return id + }(refOrID) + } + + imageID := getImageID(refOrID) + if imageID == "" { + // Try to treat ref as imageID + imageID = refOrID + } + return c.imageStore.Get(imageID) +} + +// toContainerdImage converts an image object in image store to containerd image handler. +func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) { + // image should always have at least one reference. + if len(image.References) == 0 { + return nil, fmt.Errorf("invalid image with no reference %q", image.ID) + } + return c.client.GetImage(ctx, image.References[0]) +} + +// getUserFromImage gets uid or user name of the image user. +// If user is numeric, it will be treated as uid; or else, it is treated as user name. +func getUserFromImage(user string) (*int64, string) { + // return both empty if user is not specified in the image. + if user == "" { + return nil, "" + } + // split instances where the id may contain user:group + user = strings.Split(user, ":")[0] + // user could be either uid or user name. Try to interpret as numeric uid. + uid, err := strconv.ParseInt(user, 10, 64) + if err != nil { + // If user is non numeric, assume it's user name. + return nil, user + } + // If user is a numeric uid. + return &uid, "" +} + +// ensureImageExists returns corresponding metadata of the image reference, if image is not +// pulled yet, the function will pull the image. +func (c *criService) ensureImageExists(ctx context.Context, ref string, config *runtime.PodSandboxConfig) (*imagestore.Image, error) { + image, err := c.localResolve(ref) + if err != nil && !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to get image %q: %w", ref, err) + } + if err == nil { + return &image, nil + } + // Pull image to ensure the image exists + resp, err := c.PullImage(ctx, &runtime.PullImageRequest{Image: &runtime.ImageSpec{Image: ref}, SandboxConfig: config}) + if err != nil { + return nil, fmt.Errorf("failed to pull image %q: %w", ref, err) + } + imageID := resp.GetImageRef() + newImage, err := c.imageStore.Get(imageID) + if err != nil { + // It's still possible that someone removed the image right after it is pulled. + return nil, fmt.Errorf("failed to get image %q after pulling: %w", imageID, err) + } + return &newImage, nil +} + +// validateTargetContainer checks that a container is a valid +// target for a container using PID NamespaceMode_TARGET. +// The target container must be in the same sandbox and must be running. +// Returns the target container for convenience. +func (c *criService) validateTargetContainer(sandboxID, targetContainerID string) (containerstore.Container, error) { + targetContainer, err := c.containerStore.Get(targetContainerID) + if err != nil { + return containerstore.Container{}, fmt.Errorf("container %q does not exist: %w", targetContainerID, err) + } + + targetSandboxID := targetContainer.Metadata.SandboxID + if targetSandboxID != sandboxID { + return containerstore.Container{}, + fmt.Errorf("container %q (sandbox %s) does not belong to sandbox %s", targetContainerID, targetSandboxID, sandboxID) + } + + status := targetContainer.Status.Get() + if state := status.State(); state != runtime.ContainerState_CONTAINER_RUNNING { + return containerstore.Container{}, fmt.Errorf("container %q is not running - in state %s", targetContainerID, state) + } + + return targetContainer, nil +} + +// isInCRIMounts checks whether a destination is in CRI mount list. +func isInCRIMounts(dst string, mounts []*runtime.Mount) bool { + for _, m := range mounts { + if filepath.Clean(m.ContainerPath) == filepath.Clean(dst) { + return true + } + } + return false +} + +// filterLabel returns a label filter. Use `%q` here because containerd +// filter needs extra quote to work properly. +func filterLabel(k, v string) string { + return fmt.Sprintf("labels.%q==%q", k, v) +} + +// buildLabel builds the labels from config to be passed to containerd +func buildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string { + labels := make(map[string]string) + + for k, v := range imageConfigLabels { + if err := clabels.Validate(k, v); err == nil { + labels[k] = v + } else { + // In case the image label is invalid, we output a warning and skip adding it to the + // container. + logrus.WithError(err).Warnf("unable to add image label with key %s to the container", k) + } + } + // labels from the CRI request (config) will override labels in the image config + for k, v := range configLabels { + labels[k] = v + } + labels[containerKindLabel] = containerType + return labels +} + +// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config. +func toRuntimeAuthConfig(a criconfig.AuthConfig) *runtime.AuthConfig { + return &runtime.AuthConfig{ + Username: a.Username, + Password: a.Password, + Auth: a.Auth, + IdentityToken: a.IdentityToken, + } +} + +// parseImageReferences parses a list of arbitrary image references and returns +// the repotags and repodigests +func parseImageReferences(refs []string) ([]string, []string) { + var tags, digests []string + for _, ref := range refs { + parsed, err := docker.ParseAnyReference(ref) + if err != nil { + continue + } + if _, ok := parsed.(docker.Canonical); ok { + digests = append(digests, parsed.String()) + } else if _, ok := parsed.(docker.Tagged); ok { + tags = append(tags, parsed.String()) + } + } + return tags, digests +} + +// generateRuntimeOptions generates runtime options from cri plugin config. +func generateRuntimeOptions(r criconfig.Runtime, c criconfig.Config) (interface{}, error) { + if r.Options == nil { + if r.Type != plugin.RuntimeLinuxV1 { + return nil, nil + } + // This is a legacy config, generate runctypes.RuncOptions. + return &runctypes.RuncOptions{ + Runtime: r.Engine, + RuntimeRoot: r.Root, + SystemdCgroup: c.SystemdCgroup, + }, nil + } + optionsTree, err := toml.TreeFromMap(r.Options) + if err != nil { + return nil, err + } + options := getRuntimeOptionsType(r.Type) + if err := optionsTree.Unmarshal(options); err != nil { + return nil, err + } + return options, nil +} + +// getRuntimeOptionsType gets empty runtime options by the runtime type name. +func getRuntimeOptionsType(t string) interface{} { + switch t { + case plugin.RuntimeRuncV1: + fallthrough + case plugin.RuntimeRuncV2: + return &runcoptions.Options{} + case plugin.RuntimeLinuxV1: + return &runctypes.RuncOptions{} + case runtimeRunhcsV1: + return &runhcsoptions.Options{} + default: + return &runtimeoptions.Options{} + } +} + +// getRuntimeOptions get runtime options from container metadata. +func getRuntimeOptions(c containers.Container) (interface{}, error) { + from := c.Runtime.Options + if from == nil || from.GetValue() == nil { + return nil, nil + } + opts, err := typeurl.UnmarshalAny(from) + if err != nil { + return nil, err + } + return opts, nil +} + +const ( + // unknownExitCode is the exit code when exit reason is unknown. + unknownExitCode = 255 + // unknownExitReason is the exit reason when exit reason is unknown. + unknownExitReason = "Unknown" +) + +// unknownContainerStatus returns the default container status when its status is unknown. +func unknownContainerStatus() containerstore.Status { + return containerstore.Status{ + CreatedAt: 0, + StartedAt: 0, + FinishedAt: 0, + ExitCode: unknownExitCode, + Reason: unknownExitReason, + Unknown: true, + } +} + +// unknownSandboxStatus returns the default sandbox status when its status is unknown. +func unknownSandboxStatus() sandboxstore.Status { + return sandboxstore.Status{ + State: sandboxstore.StateUnknown, + } +} + +// getPassthroughAnnotations filters requested pod annotations by comparing +// against permitted annotations for the given runtime. +func getPassthroughAnnotations(podAnnotations map[string]string, + runtimePodAnnotations []string) (passthroughAnnotations map[string]string) { + passthroughAnnotations = make(map[string]string) + + for podAnnotationKey, podAnnotationValue := range podAnnotations { + for _, pattern := range runtimePodAnnotations { + // Use path.Match instead of filepath.Match here. + // filepath.Match treated `\\` as path separator + // on windows, which is not what we want. + if ok, _ := path.Match(pattern, podAnnotationKey); ok { + passthroughAnnotations[podAnnotationKey] = podAnnotationValue + } + } + } + return passthroughAnnotations +} diff --git a/pkg/cri/sbserver/helpers_linux.go b/pkg/cri/sbserver/helpers_linux.go new file mode 100644 index 000000000..060e7631e --- /dev/null +++ b/pkg/cri/sbserver/helpers_linux.go @@ -0,0 +1,277 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "os" + "path" + "path/filepath" + "regexp" + "sort" + "strings" + "syscall" + "time" + + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/pkg/apparmor" + "github.com/containerd/containerd/pkg/seccomp" + "github.com/containerd/containerd/pkg/seutil" + "github.com/moby/sys/mountinfo" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux/label" + "golang.org/x/sys/unix" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +const ( + // defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938). + defaultSandboxOOMAdj = -998 + // defaultShmSize is the default size of the sandbox shm. + defaultShmSize = int64(1024 * 1024 * 64) + // relativeRootfsPath is the rootfs path relative to bundle path. + relativeRootfsPath = "rootfs" + // devShm is the default path of /dev/shm. + devShm = "/dev/shm" + // etcHosts is the default path of /etc/hosts file. + etcHosts = "/etc/hosts" + // etcHostname is the default path of /etc/hostname file. + etcHostname = "/etc/hostname" + // resolvConfPath is the abs path of resolv.conf on host or container. + resolvConfPath = "/etc/resolv.conf" + // hostnameEnv is the key for HOSTNAME env. + hostnameEnv = "HOSTNAME" +) + +// getCgroupsPath generates container cgroups path. +func getCgroupsPath(cgroupsParent, id string) string { + base := path.Base(cgroupsParent) + if strings.HasSuffix(base, ".slice") { + // For a.slice/b.slice/c.slice, base is c.slice. + // runc systemd cgroup path format is "slice:prefix:name". + return strings.Join([]string{base, "cri-containerd", id}, ":") + } + return filepath.Join(cgroupsParent, id) +} + +// getSandboxHostname returns the hostname file path inside the sandbox root directory. +func (c *criService) getSandboxHostname(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "hostname") +} + +// getSandboxHosts returns the hosts file path inside the sandbox root directory. +func (c *criService) getSandboxHosts(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "hosts") +} + +// getResolvPath returns resolv.conf filepath for specified sandbox. +func (c *criService) getResolvPath(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "resolv.conf") +} + +// getSandboxDevShm returns the shm file path inside the sandbox root directory. +func (c *criService) getSandboxDevShm(id string) string { + return filepath.Join(c.getVolatileSandboxRootDir(id), "shm") +} + +func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) { + var labels []string + + if selinuxOptions == nil { + return nil, nil + } + if err := checkSelinuxLevel(selinuxOptions.Level); err != nil { + return nil, err + } + if selinuxOptions.User != "" { + labels = append(labels, "user:"+selinuxOptions.User) + } + if selinuxOptions.Role != "" { + labels = append(labels, "role:"+selinuxOptions.Role) + } + if selinuxOptions.Type != "" { + labels = append(labels, "type:"+selinuxOptions.Type) + } + if selinuxOptions.Level != "" { + labels = append(labels, "level:"+selinuxOptions.Level) + } + + return labels, nil +} + +func initLabelsFromOpt(selinuxOpts *runtime.SELinuxOption) (string, string, error) { + labels, err := toLabel(selinuxOpts) + if err != nil { + return "", "", err + } + return label.InitLabels(labels) +} + +func checkSelinuxLevel(level string) error { + if len(level) == 0 { + return nil + } + + matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level) + if err != nil { + return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err) + } + if !matched { + return fmt.Errorf("the format of 'level' %q is not correct", level) + } + return nil +} + +// apparmorEnabled returns true if apparmor is enabled, supported by the host, +// if apparmor_parser is installed, and if we are not running docker-in-docker. +func (c *criService) apparmorEnabled() bool { + if c.config.DisableApparmor { + return false + } + return apparmor.HostSupports() +} + +func (c *criService) seccompEnabled() bool { + return seccomp.IsEnabled() +} + +// openLogFile opens/creates a container log file. +func openLogFile(path string) (*os.File, error) { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return nil, err + } + return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640) +} + +// unmountRecursive unmounts the target and all mounts underneath, starting with +// the deepest mount first. +func unmountRecursive(ctx context.Context, target string) error { + toUnmount, err := mountinfo.GetMounts(mountinfo.PrefixFilter(target)) + if err != nil { + return err + } + + // Make the deepest mount be first + sort.Slice(toUnmount, func(i, j int) bool { + return len(toUnmount[i].Mountpoint) > len(toUnmount[j].Mountpoint) + }) + + for i, m := range toUnmount { + if err := mount.UnmountAll(m.Mountpoint, unix.MNT_DETACH); err != nil { + if i == len(toUnmount)-1 { // last mount + return err + } + // This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem + log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", m.Mountpoint) + } + } + return nil +} + +// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can +// often be remedied. +// Only use `ensureRemoveAll` if you really want to make every effort to remove +// a directory. +// +// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there +// can be a race between reading directory entries and then actually attempting +// to remove everything in the directory. +// These types of errors do not need to be returned since it's ok for the dir to +// be gone we can just retry the remove operation. +// +// This should not return a `os.ErrNotExist` kind of error under any circumstances +func ensureRemoveAll(ctx context.Context, dir string) error { + notExistErr := make(map[string]bool) + + // track retries + exitOnErr := make(map[string]int) + maxRetry := 50 + + // Attempt to unmount anything beneath this dir first. + if err := unmountRecursive(ctx, dir); err != nil { + log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir) + } + + for { + err := os.RemoveAll(dir) + if err == nil { + return nil + } + + pe, ok := err.(*os.PathError) + if !ok { + return err + } + + if os.IsNotExist(err) { + if notExistErr[pe.Path] { + return err + } + notExistErr[pe.Path] = true + + // There is a race where some subdir can be removed but after the + // parent dir entries have been read. + // So the path could be from `os.Remove(subdir)` + // If the reported non-existent path is not the passed in `dir` we + // should just retry, but otherwise return with no error. + if pe.Path == dir { + return nil + } + continue + } + + if pe.Err != syscall.EBUSY { + return err + } + if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil { + return fmt.Errorf("error while removing %s: %w", dir, e) + } + + if exitOnErr[pe.Path] == maxRetry { + return err + } + exitOnErr[pe.Path]++ + time.Sleep(100 * time.Millisecond) + } +} + +var vmbasedRuntimes = []string{ + "io.containerd.kata", +} + +func isVMBasedRuntime(runtimeType string) bool { + for _, rt := range vmbasedRuntimes { + if strings.Contains(runtimeType, rt) { + return true + } + } + return false +} + +func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { + if !isVMBasedRuntime(runtimeType) { + return nil + } + l, err := seutil.ChangeToKVM(spec.Process.SelinuxLabel) + if err != nil { + return fmt.Errorf("failed to get selinux kvm label: %w", err) + } + spec.Process.SelinuxLabel = l + return nil +} diff --git a/pkg/cri/sbserver/helpers_linux_test.go b/pkg/cri/sbserver/helpers_linux_test.go new file mode 100644 index 000000000..b0ded7c81 --- /dev/null +++ b/pkg/cri/sbserver/helpers_linux_test.go @@ -0,0 +1,100 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "golang.org/x/sys/unix" +) + +func TestGetCgroupsPath(t *testing.T) { + testID := "test-id" + for desc, test := range map[string]struct { + cgroupsParent string + expected string + }{ + "should support regular cgroup path": { + cgroupsParent: "/a/b", + expected: "/a/b/test-id", + }, + "should support systemd cgroup path": { + cgroupsParent: "/a.slice/b.slice", + expected: "b.slice:cri-containerd:test-id", + }, + "should support tailing slash for regular cgroup path": { + cgroupsParent: "/a/b/", + expected: "/a/b/test-id", + }, + "should support tailing slash for systemd cgroup path": { + cgroupsParent: "/a.slice/b.slice/", + expected: "b.slice:cri-containerd:test-id", + }, + "should treat root cgroup as regular cgroup path": { + cgroupsParent: "/", + expected: "/test-id", + }, + } { + t.Run(desc, func(t *testing.T) { + got := getCgroupsPath(test.cgroupsParent, testID) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestEnsureRemoveAllWithMount(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("skipping test that requires root") + } + + var err error + dir1 := t.TempDir() + dir2 := t.TempDir() + + bindDir := filepath.Join(dir1, "bind") + if err := os.MkdirAll(bindDir, 0755); err != nil { + t.Fatal(err) + } + + if err := unix.Mount(dir2, bindDir, "none", unix.MS_BIND, ""); err != nil { + t.Fatal(err) + } + + done := make(chan struct{}) + go func() { + err = ensureRemoveAll(context.Background(), dir1) + close(done) + }() + + select { + case <-done: + if err != nil { + t.Fatal(err) + } + case <-time.After(5 * time.Second): + t.Fatal("timeout waiting for EnsureRemoveAll to finish") + } + + if _, err := os.Stat(dir1); !os.IsNotExist(err) { + t.Fatalf("expected %q to not exist", dir1) + } +} diff --git a/pkg/cri/sbserver/helpers_other.go b/pkg/cri/sbserver/helpers_other.go new file mode 100644 index 000000000..7472439dd --- /dev/null +++ b/pkg/cri/sbserver/helpers_other.go @@ -0,0 +1,44 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "os" + + "github.com/opencontainers/runtime-spec/specs-go" +) + +// openLogFile opens/creates a container log file. +func openLogFile(path string) (*os.File, error) { + return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640) +} + +// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can +// often be remedied. +// Only use `ensureRemoveAll` if you really want to make every effort to remove +// a directory. +func ensureRemoveAll(ctx context.Context, dir string) error { + return os.RemoveAll(dir) +} + +func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { + return nil +} diff --git a/pkg/cri/sbserver/helpers_selinux_linux_test.go b/pkg/cri/sbserver/helpers_selinux_linux_test.go new file mode 100644 index 000000000..d925235b1 --- /dev/null +++ b/pkg/cri/sbserver/helpers_selinux_linux_test.go @@ -0,0 +1,157 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + + "github.com/opencontainers/selinux/go-selinux" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func TestInitSelinuxOpts(t *testing.T) { + if !selinux.GetEnabled() { + t.Skip("selinux is not enabled") + } + + for desc, test := range map[string]struct { + selinuxOpt *runtime.SELinuxOption + processLabel string + mountLabel string + expectErr bool + }{ + "Should return empty strings for processLabel and mountLabel when selinuxOpt is nil": { + selinuxOpt: nil, + processLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}", + mountLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}", + }, + "Should overlay fields on processLabel when selinuxOpt has been initialized partially": { + selinuxOpt: &runtime.SELinuxOption{ + User: "", + Role: "user_r", + Type: "", + Level: "s0:c1,c2", + }, + processLabel: "system_u:user_r:(container_file_t|svirt_lxc_net_t):s0:c1,c2", + mountLabel: "system_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2", + }, + "Should be resolved correctly when selinuxOpt has been initialized completely": { + selinuxOpt: &runtime.SELinuxOption{ + User: "user_u", + Role: "user_r", + Type: "user_t", + Level: "s0:c1,c2", + }, + processLabel: "user_u:user_r:user_t:s0:c1,c2", + mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2", + }, + "Should be resolved correctly when selinuxOpt has been initialized with level=''": { + selinuxOpt: &runtime.SELinuxOption{ + User: "user_u", + Role: "user_r", + Type: "user_t", + Level: "", + }, + processLabel: "user_u:user_r:user_t:s0:c[0-9]{1,3},c[0-9]{1,3}", + mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0", + }, + "Should return error when the format of 'level' is not correct": { + selinuxOpt: &runtime.SELinuxOption{ + User: "user_u", + Role: "user_r", + Type: "user_t", + Level: "s0,c1,c2", + }, + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + processLabel, mountLabel, err := initLabelsFromOpt(test.selinuxOpt) + if test.expectErr { + assert.Error(t, err) + } else { + assert.Regexp(t, test.processLabel, processLabel) + assert.Regexp(t, test.mountLabel, mountLabel) + } + }) + } +} + +func TestCheckSelinuxLevel(t *testing.T) { + for desc, test := range map[string]struct { + level string + expectNoMatch bool + }{ + "s0": { + level: "s0", + }, + "s0-s0": { + level: "s0-s0", + }, + "s0:c0": { + level: "s0:c0", + }, + "s0:c0.c3": { + level: "s0:c0.c3", + }, + "s0:c0,c3": { + level: "s0:c0,c3", + }, + "s0-s0:c0,c3": { + level: "s0-s0:c0,c3", + }, + "s0-s0:c0,c3.c6": { + level: "s0-s0:c0,c3.c6", + }, + "s0-s0:c0,c3.c6,c8.c10": { + level: "s0-s0:c0,c3.c6,c8.c10", + }, + "s0-s0:c0,c3.c6,c8,c10": { + level: "s0-s0:c0,c3.c6", + }, + "s0,c0,c3": { + level: "s0,c0,c3", + expectNoMatch: true, + }, + "s0:c0.c3.c6": { + level: "s0:c0.c3.c6", + expectNoMatch: true, + }, + "s0-s0,c0,c3": { + level: "s0-s0,c0,c3", + expectNoMatch: true, + }, + "s0-s0:c0.c3.c6": { + level: "s0-s0:c0.c3.c6", + expectNoMatch: true, + }, + "s0-s0:c0,c3.c6.c8": { + level: "s0-s0:c0,c3.c6.c8", + expectNoMatch: true, + }, + } { + t.Run(desc, func(t *testing.T) { + err := checkSelinuxLevel(test.level) + if test.expectNoMatch { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/pkg/cri/sbserver/helpers_test.go b/pkg/cri/sbserver/helpers_test.go new file mode 100644 index 000000000..b9a9aa8a8 --- /dev/null +++ b/pkg/cri/sbserver/helpers_test.go @@ -0,0 +1,617 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "os" + "strings" + "testing" + "time" + + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/oci" + criconfig "github.com/containerd/containerd/pkg/cri/config" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + "github.com/containerd/containerd/plugin" + "github.com/containerd/containerd/protobuf/types" + "github.com/containerd/containerd/reference/docker" + "github.com/containerd/containerd/runtime/linux/runctypes" + runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" + "github.com/containerd/typeurl" + + imagedigest "github.com/opencontainers/go-digest" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pelletier/go-toml" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestGetUserFromImage tests the logic of getting image uid or user name of image user. +func TestGetUserFromImage(t *testing.T) { + newI64 := func(i int64) *int64 { return &i } + for c, test := range map[string]struct { + user string + uid *int64 + name string + }{ + "no gid": { + user: "0", + uid: newI64(0), + }, + "uid/gid": { + user: "0:1", + uid: newI64(0), + }, + "empty user": { + user: "", + }, + "multiple separators": { + user: "1:2:3", + uid: newI64(1), + }, + "root username": { + user: "root:root", + name: "root", + }, + "username": { + user: "test:test", + name: "test", + }, + } { + t.Run(c, func(t *testing.T) { + actualUID, actualName := getUserFromImage(test.user) + assert.Equal(t, test.uid, actualUID) + assert.Equal(t, test.name, actualName) + }) + } +} + +func TestGetRepoDigestAndTag(t *testing.T) { + digest := imagedigest.Digest("sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582") + for desc, test := range map[string]struct { + ref string + schema1 bool + expectedRepoDigest string + expectedRepoTag string + }{ + "repo tag should be empty if original ref has no tag": { + ref: "gcr.io/library/busybox@" + digest.String(), + expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(), + }, + "repo tag should not be empty if original ref has tag": { + ref: "gcr.io/library/busybox:latest", + expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(), + expectedRepoTag: "gcr.io/library/busybox:latest", + }, + "repo digest should be empty if original ref is schema1 and has no digest": { + ref: "gcr.io/library/busybox:latest", + schema1: true, + expectedRepoDigest: "", + expectedRepoTag: "gcr.io/library/busybox:latest", + }, + "repo digest should not be empty if original ref is schema1 but has digest": { + ref: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594", + schema1: true, + expectedRepoDigest: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594", + expectedRepoTag: "", + }, + } { + t.Run(desc, func(t *testing.T) { + named, err := docker.ParseDockerRef(test.ref) + assert.NoError(t, err) + repoDigest, repoTag := getRepoDigestAndTag(named, digest, test.schema1) + assert.Equal(t, test.expectedRepoDigest, repoDigest) + assert.Equal(t, test.expectedRepoTag, repoTag) + }) + } +} + +func TestBuildLabels(t *testing.T) { + imageConfigLabels := map[string]string{ + "a": "z", + "d": "y", + "long-label": strings.Repeat("example", 10000), + } + configLabels := map[string]string{ + "a": "b", + "c": "d", + } + newLabels := buildLabels(configLabels, imageConfigLabels, containerKindSandbox) + assert.Len(t, newLabels, 4) + assert.Equal(t, "b", newLabels["a"]) + assert.Equal(t, "d", newLabels["c"]) + assert.Equal(t, "y", newLabels["d"]) + assert.Equal(t, containerKindSandbox, newLabels[containerKindLabel]) + assert.NotContains(t, newLabels, "long-label") + + newLabels["a"] = "e" + assert.Empty(t, configLabels[containerKindLabel], "should not add new labels into original label") + assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label") +} + +func TestParseImageReferences(t *testing.T) { + refs := []string{ + "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + "gcr.io/library/busybox:1.2", + "sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + "arbitrary-ref", + } + expectedTags := []string{ + "gcr.io/library/busybox:1.2", + } + expectedDigests := []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"} + tags, digests := parseImageReferences(refs) + assert.Equal(t, expectedTags, tags) + assert.Equal(t, expectedDigests, digests) +} + +func TestLocalResolve(t *testing.T) { + image := imagestore.Image{ + ID: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", + ChainID: "test-chain-id-1", + References: []string{ + "docker.io/library/busybox:latest", + "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + }, + Size: 10, + } + c := newTestCRIService() + var err error + c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image}) + assert.NoError(t, err) + + for _, ref := range []string{ + "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", + "busybox", + "busybox:latest", + "busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + "library/busybox", + "library/busybox:latest", + "library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + "docker.io/busybox", + "docker.io/busybox:latest", + "docker.io/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + "docker.io/library/busybox", + "docker.io/library/busybox:latest", + "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + } { + img, err := c.localResolve(ref) + assert.NoError(t, err) + assert.Equal(t, image, img) + } + img, err := c.localResolve("randomid") + assert.Equal(t, errdefs.IsNotFound(err), true) + assert.Equal(t, imagestore.Image{}, img) +} + +func TestGenerateRuntimeOptions(t *testing.T) { + nilOpts := ` +systemd_cgroup = true +[containerd] + no_pivot = true + default_runtime_name = "default" +[containerd.runtimes.legacy] + runtime_type = "` + plugin.RuntimeLinuxV1 + `" +[containerd.runtimes.runc] + runtime_type = "` + plugin.RuntimeRuncV1 + `" +[containerd.runtimes.runcv2] + runtime_type = "` + plugin.RuntimeRuncV2 + `" +` + nonNilOpts := ` +systemd_cgroup = true +[containerd] + no_pivot = true + default_runtime_name = "default" +[containerd.runtimes.legacy] + runtime_type = "` + plugin.RuntimeLinuxV1 + `" +[containerd.runtimes.legacy.options] + Runtime = "legacy" + RuntimeRoot = "/legacy" +[containerd.runtimes.runc] + runtime_type = "` + plugin.RuntimeRuncV1 + `" +[containerd.runtimes.runc.options] + BinaryName = "runc" + Root = "/runc" + NoNewKeyring = true +[containerd.runtimes.runcv2] + runtime_type = "` + plugin.RuntimeRuncV2 + `" +[containerd.runtimes.runcv2.options] + BinaryName = "runc" + Root = "/runcv2" + NoNewKeyring = true +` + var nilOptsConfig, nonNilOptsConfig criconfig.Config + tree, err := toml.Load(nilOpts) + require.NoError(t, err) + err = tree.Unmarshal(&nilOptsConfig) + require.NoError(t, err) + require.Len(t, nilOptsConfig.Runtimes, 3) + + tree, err = toml.Load(nonNilOpts) + require.NoError(t, err) + err = tree.Unmarshal(&nonNilOptsConfig) + require.NoError(t, err) + require.Len(t, nonNilOptsConfig.Runtimes, 3) + + for desc, test := range map[string]struct { + r criconfig.Runtime + c criconfig.Config + expectedOptions interface{} + }{ + "when options is nil, should return nil option for io.containerd.runc.v1": { + r: nilOptsConfig.Runtimes["runc"], + c: nilOptsConfig, + expectedOptions: nil, + }, + "when options is nil, should return nil option for io.containerd.runc.v2": { + r: nilOptsConfig.Runtimes["runcv2"], + c: nilOptsConfig, + expectedOptions: nil, + }, + "when options is nil, should use legacy fields for legacy runtime": { + r: nilOptsConfig.Runtimes["legacy"], + c: nilOptsConfig, + expectedOptions: &runctypes.RuncOptions{ + SystemdCgroup: true, + }, + }, + "when options is not nil, should be able to decode for io.containerd.runc.v1": { + r: nonNilOptsConfig.Runtimes["runc"], + c: nonNilOptsConfig, + expectedOptions: &runcoptions.Options{ + BinaryName: "runc", + Root: "/runc", + NoNewKeyring: true, + }, + }, + "when options is not nil, should be able to decode for io.containerd.runc.v2": { + r: nonNilOptsConfig.Runtimes["runcv2"], + c: nonNilOptsConfig, + expectedOptions: &runcoptions.Options{ + BinaryName: "runc", + Root: "/runcv2", + NoNewKeyring: true, + }, + }, + "when options is not nil, should be able to decode for legacy runtime": { + r: nonNilOptsConfig.Runtimes["legacy"], + c: nonNilOptsConfig, + expectedOptions: &runctypes.RuncOptions{ + Runtime: "legacy", + RuntimeRoot: "/legacy", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + opts, err := generateRuntimeOptions(test.r, test.c) + assert.NoError(t, err) + assert.Equal(t, test.expectedOptions, opts) + }) + } +} + +func TestEnvDeduplication(t *testing.T) { + for desc, test := range map[string]struct { + existing []string + kv [][2]string + expected []string + }{ + "single env": { + kv: [][2]string{ + {"a", "b"}, + }, + expected: []string{"a=b"}, + }, + "multiple envs": { + kv: [][2]string{ + {"a", "b"}, + {"c", "d"}, + {"e", "f"}, + }, + expected: []string{ + "a=b", + "c=d", + "e=f", + }, + }, + "env override": { + kv: [][2]string{ + {"k1", "v1"}, + {"k2", "v2"}, + {"k3", "v3"}, + {"k3", "v4"}, + {"k1", "v5"}, + {"k4", "v6"}, + }, + expected: []string{ + "k1=v5", + "k2=v2", + "k3=v4", + "k4=v6", + }, + }, + "existing env": { + existing: []string{ + "k1=v1", + "k2=v2", + "k3=v3", + }, + kv: [][2]string{ + {"k3", "v4"}, + {"k2", "v5"}, + {"k4", "v6"}, + }, + expected: []string{ + "k1=v1", + "k2=v5", + "k3=v4", + "k4=v6", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + var spec runtimespec.Spec + if len(test.existing) > 0 { + spec.Process = &runtimespec.Process{ + Env: test.existing, + } + } + for _, kv := range test.kv { + oci.WithEnv([]string{kv[0] + "=" + kv[1]})(context.Background(), nil, nil, &spec) + } + assert.Equal(t, test.expected, spec.Process.Env) + }) + } +} + +func TestPassThroughAnnotationsFilter(t *testing.T) { + for desc, test := range map[string]struct { + podAnnotations map[string]string + runtimePodAnnotations []string + passthroughAnnotations map[string]string + }{ + "should support direct match": { + podAnnotations: map[string]string{"c": "d", "d": "e"}, + runtimePodAnnotations: []string{"c"}, + passthroughAnnotations: map[string]string{"c": "d"}, + }, + "should support wildcard match": { + podAnnotations: map[string]string{ + "t.f": "j", + "z.g": "o", + "z": "o", + "y.ca": "b", + "y": "b", + }, + runtimePodAnnotations: []string{"*.f", "z*g", "y.c*"}, + passthroughAnnotations: map[string]string{ + "t.f": "j", + "z.g": "o", + "y.ca": "b", + }, + }, + "should support wildcard match all": { + podAnnotations: map[string]string{ + "t.f": "j", + "z.g": "o", + "z": "o", + "y.ca": "b", + "y": "b", + }, + runtimePodAnnotations: []string{"*"}, + passthroughAnnotations: map[string]string{ + "t.f": "j", + "z.g": "o", + "z": "o", + "y.ca": "b", + "y": "b", + }, + }, + "should support match including path separator": { + podAnnotations: map[string]string{ + "matchend.com/end": "1", + "matchend.com/end1": "2", + "matchend.com/1end": "3", + "matchmid.com/mid": "4", + "matchmid.com/mi1d": "5", + "matchmid.com/mid1": "6", + "matchhead.com/head": "7", + "matchhead.com/1head": "8", + "matchhead.com/head1": "9", + "matchall.com/abc": "10", + "matchall.com/def": "11", + "end/matchend": "12", + "end1/matchend": "13", + "1end/matchend": "14", + "mid/matchmid": "15", + "mi1d/matchmid": "16", + "mid1/matchmid": "17", + "head/matchhead": "18", + "1head/matchhead": "19", + "head1/matchhead": "20", + "abc/matchall": "21", + "def/matchall": "22", + "match1/match2": "23", + "nomatch/nomatch": "24", + }, + runtimePodAnnotations: []string{ + "matchend.com/end*", + "matchmid.com/mi*d", + "matchhead.com/*head", + "matchall.com/*", + "end*/matchend", + "mi*d/matchmid", + "*head/matchhead", + "*/matchall", + "match*/match*", + }, + passthroughAnnotations: map[string]string{ + "matchend.com/end": "1", + "matchend.com/end1": "2", + "matchmid.com/mid": "4", + "matchmid.com/mi1d": "5", + "matchhead.com/head": "7", + "matchhead.com/1head": "8", + "matchall.com/abc": "10", + "matchall.com/def": "11", + "end/matchend": "12", + "end1/matchend": "13", + "mid/matchmid": "15", + "mi1d/matchmid": "16", + "head/matchhead": "18", + "1head/matchhead": "19", + "abc/matchall": "21", + "def/matchall": "22", + "match1/match2": "23", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + passthroughAnnotations := getPassthroughAnnotations(test.podAnnotations, test.runtimePodAnnotations) + assert.Equal(t, test.passthroughAnnotations, passthroughAnnotations) + }) + } +} + +func TestEnsureRemoveAllNotExist(t *testing.T) { + // should never return an error for a non-existent path + if err := ensureRemoveAll(context.Background(), "/non/existent/path"); err != nil { + t.Fatal(err) + } +} + +func TestEnsureRemoveAllWithDir(t *testing.T) { + dir := t.TempDir() + if err := ensureRemoveAll(context.Background(), dir); err != nil { + t.Fatal(err) + } +} + +func TestEnsureRemoveAllWithFile(t *testing.T) { + tmp, err := os.CreateTemp("", "test-ensure-removeall-with-dir") + if err != nil { + t.Fatal(err) + } + tmp.Close() + if err := ensureRemoveAll(context.Background(), tmp.Name()); err != nil { + t.Fatal(err) + } +} + +// Helper function for setting up an environment to test PID namespace targeting. +func addContainer(c *criService, containerID, sandboxID string, PID uint32, createdAt, startedAt, finishedAt int64) error { + meta := containerstore.Metadata{ + ID: containerID, + SandboxID: sandboxID, + } + status := containerstore.Status{ + Pid: PID, + CreatedAt: createdAt, + StartedAt: startedAt, + FinishedAt: finishedAt, + } + container, err := containerstore.NewContainer(meta, + containerstore.WithFakeStatus(status), + ) + if err != nil { + return err + } + return c.containerStore.Add(container) +} + +func TestValidateTargetContainer(t *testing.T) { + testSandboxID := "test-sandbox-uid" + + // The existing container that will be targeted. + testTargetContainerID := "test-target-container" + testTargetContainerPID := uint32(4567) + + // A container that has finished running and cannot be targeted. + testStoppedContainerID := "stopped-target-container" + testStoppedContainerPID := uint32(6789) + + // A container from another pod. + testOtherContainerSandboxID := "other-sandbox-uid" + testOtherContainerID := "other-target-container" + testOtherContainerPID := uint32(7890) + + // Container create/start/stop times. + createdAt := time.Now().Add(-15 * time.Second).UnixNano() + startedAt := time.Now().Add(-10 * time.Second).UnixNano() + finishedAt := time.Now().Add(-5 * time.Second).UnixNano() + + c := newTestCRIService() + + // Create a target container. + err := addContainer(c, testTargetContainerID, testSandboxID, testTargetContainerPID, createdAt, startedAt, 0) + require.NoError(t, err, "error creating test target container") + + // Create a stopped container. + err = addContainer(c, testStoppedContainerID, testSandboxID, testStoppedContainerPID, createdAt, startedAt, finishedAt) + require.NoError(t, err, "error creating test stopped container") + + // Create a container in another pod. + err = addContainer(c, testOtherContainerID, testOtherContainerSandboxID, testOtherContainerPID, createdAt, startedAt, 0) + require.NoError(t, err, "error creating test container in other pod") + + for desc, test := range map[string]struct { + targetContainerID string + expectError bool + }{ + "target container in pod": { + targetContainerID: testTargetContainerID, + expectError: false, + }, + "target stopped container in pod": { + targetContainerID: testStoppedContainerID, + expectError: true, + }, + "target container does not exist": { + targetContainerID: "no-container-with-this-id", + expectError: true, + }, + "target container in other pod": { + targetContainerID: testOtherContainerID, + expectError: true, + }, + } { + t.Run(desc, func(t *testing.T) { + targetContainer, err := c.validateTargetContainer(testSandboxID, test.targetContainerID) + if test.expectError { + require.Error(t, err, "target should have been invalid but no error") + return + } + require.NoErrorf(t, err, "target should have been valid but got error") + + assert.Equal(t, test.targetContainerID, targetContainer.ID, "returned target container does not have expected ID") + }) + } + +} + +func TestGetRuntimeOptions(t *testing.T) { + _, err := getRuntimeOptions(containers.Container{}) + require.NoError(t, err) + + var pbany *types.Any // This is nil. + var typeurlAny typeurl.Any = pbany // This is typed nil. + _, err = getRuntimeOptions(containers.Container{Runtime: containers.RuntimeInfo{Options: typeurlAny}}) + require.NoError(t, err) +} diff --git a/pkg/cri/sbserver/helpers_windows.go b/pkg/cri/sbserver/helpers_windows.go new file mode 100644 index 000000000..b052f37ca --- /dev/null +++ b/pkg/cri/sbserver/helpers_windows.go @@ -0,0 +1,168 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "os" + "path/filepath" + "syscall" + + "github.com/opencontainers/runtime-spec/specs-go" +) + +// openLogFile opens/creates a container log file. +// It specifies `FILE_SHARE_DELETE` option to make sure +// log files can be rotated by kubelet. +// TODO(windows): Use golang support after 1.14. (https://github.com/golang/go/issues/32088) +func openLogFile(path string) (*os.File, error) { + path = fixLongPath(path) + if len(path) == 0 { + return nil, syscall.ERROR_FILE_NOT_FOUND + } + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return nil, err + } + createmode := uint32(syscall.OPEN_ALWAYS) + access := uint32(syscall.FILE_APPEND_DATA) + sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE) + h, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(h), path), nil +} + +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// fixLongPath returns the extended-length (\\?\-prefixed) form of +// path when needed, in order to avoid the default 260 character file +// path limit imposed by Windows. If path is not easily converted to +// the extended-length form (for example, if path is a relative path +// or contains .. elements), or is short enough, fixLongPath returns +// path unmodified. +// +// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath +// +// This is copied from https://golang.org/src/path/filepath/path_windows.go. +func fixLongPath(path string) string { + // Do nothing (and don't allocate) if the path is "short". + // Empirically (at least on the Windows Server 2013 builder), + // the kernel is arbitrarily okay with < 248 bytes. That + // matches what the docs above say: + // "When using an API to create a directory, the specified + // path cannot be so long that you cannot append an 8.3 file + // name (that is, the directory name cannot exceed MAX_PATH + // minus 12)." Since MAX_PATH is 260, 260 - 12 = 248. + // + // The MSDN docs appear to say that a normal path that is 248 bytes long + // will work; empirically the path must be less then 248 bytes long. + if len(path) < 248 { + // Don't fix. (This is how Go 1.7 and earlier worked, + // not automatically generating the \\?\ form) + return path + } + + // The extended form begins with \\?\, as in + // \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt. + // The extended form disables evaluation of . and .. path + // elements and disables the interpretation of / as equivalent + // to \. The conversion here rewrites / to \ and elides + // . elements as well as trailing or duplicate separators. For + // simplicity it avoids the conversion entirely for relative + // paths or paths containing .. elements. For now, + // \\server\share paths are not converted to + // \\?\UNC\server\share paths because the rules for doing so + // are less well-specified. + if len(path) >= 2 && path[:2] == `\\` { + // Don't canonicalize UNC paths. + return path + } + if !filepath.IsAbs(path) { + // Relative path + return path + } + + const prefix = `\\?` + + pathbuf := make([]byte, len(prefix)+len(path)+len(`\`)) + copy(pathbuf, prefix) + n := len(path) + r, w := 0, len(prefix) + for r < n { + switch { + case os.IsPathSeparator(path[r]): + // empty block + r++ + case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])): + // /./ + r++ + case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])): + // /../ is currently unhandled + return path + default: + pathbuf[w] = '\\' + w++ + for ; r < n && !os.IsPathSeparator(path[r]); r++ { + pathbuf[w] = path[r] + w++ + } + } + } + // A drive's root directory needs a trailing \ + if w == len(`\\?\c:`) { + pathbuf[w] = '\\' + w++ + } + return string(pathbuf[:w]) +} + +// ensureRemoveAll is a wrapper for os.RemoveAll on Windows. +func ensureRemoveAll(_ context.Context, dir string) error { + return os.RemoveAll(dir) +} + +func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { + return nil +} diff --git a/pkg/cri/sbserver/image_list.go b/pkg/cri/sbserver/image_list.go new file mode 100644 index 000000000..569e30492 --- /dev/null +++ b/pkg/cri/sbserver/image_list.go @@ -0,0 +1,39 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ListImages lists existing images. +// TODO(random-liu): Add image list filters after CRI defines this more clear, and kubelet +// actually needs it. +func (c *criService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (*runtime.ListImagesResponse, error) { + imagesInStore := c.imageStore.List() + + var images []*runtime.Image + for _, image := range imagesInStore { + // TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot + // doesn't exist? + images = append(images, toCRIImage(image)) + } + + return &runtime.ListImagesResponse{Images: images}, nil +} diff --git a/pkg/cri/sbserver/image_list_test.go b/pkg/cri/sbserver/image_list_test.go new file mode 100644 index 000000000..6a9a4fc7b --- /dev/null +++ b/pkg/cri/sbserver/image_list_test.go @@ -0,0 +1,113 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + imagestore "github.com/containerd/containerd/pkg/cri/store/image" +) + +func TestListImages(t *testing.T) { + c := newTestCRIService() + imagesInStore := []imagestore.Image{ + { + ID: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + ChainID: "test-chainid-1", + References: []string{ + "gcr.io/library/busybox:latest", + "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + }, + Size: 1000, + ImageSpec: imagespec.Image{ + Config: imagespec.ImageConfig{ + User: "root", + }, + }, + }, + { + ID: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + ChainID: "test-chainid-2", + References: []string{ + "gcr.io/library/alpine:latest", + "gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + }, + Size: 2000, + ImageSpec: imagespec.Image{ + Config: imagespec.ImageConfig{ + User: "1234:1234", + }, + }, + }, + { + ID: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + ChainID: "test-chainid-3", + References: []string{ + "gcr.io/library/ubuntu:latest", + "gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + }, + Size: 3000, + ImageSpec: imagespec.Image{ + Config: imagespec.ImageConfig{ + User: "nobody", + }, + }, + }, + } + expect := []*runtime.Image{ + { + Id: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + RepoTags: []string{"gcr.io/library/busybox:latest"}, + RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, + Size_: uint64(1000), + Username: "root", + }, + { + Id: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + RepoTags: []string{"gcr.io/library/alpine:latest"}, + RepoDigests: []string{"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, + Size_: uint64(2000), + Uid: &runtime.Int64Value{Value: 1234}, + }, + { + Id: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + RepoTags: []string{"gcr.io/library/ubuntu:latest"}, + RepoDigests: []string{"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, + Size_: uint64(3000), + Username: "nobody", + }, + } + + var err error + c.imageStore, err = imagestore.NewFakeStore(imagesInStore) + assert.NoError(t, err) + + resp, err := c.ListImages(context.Background(), &runtime.ListImagesRequest{}) + assert.NoError(t, err) + require.NotNil(t, resp) + images := resp.GetImages() + assert.Len(t, images, len(expect)) + for _, i := range expect { + assert.Contains(t, images, i) + } +} diff --git a/pkg/cri/sbserver/image_pull.go b/pkg/cri/sbserver/image_pull.go new file mode 100644 index 000000000..148ba3487 --- /dev/null +++ b/pkg/cri/sbserver/image_pull.go @@ -0,0 +1,822 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "crypto/tls" + "crypto/x509" + "encoding/base64" + "fmt" + "io" + "net" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/containerd/imgcrypt" + "github.com/containerd/imgcrypt/images/encryption" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/errdefs" + containerdimages "github.com/containerd/containerd/images" + "github.com/containerd/containerd/labels" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/pkg/cri/annotations" + criconfig "github.com/containerd/containerd/pkg/cri/config" + distribution "github.com/containerd/containerd/reference/docker" + "github.com/containerd/containerd/remotes/docker" + "github.com/containerd/containerd/remotes/docker/config" +) + +// For image management: +// 1) We have an in-memory metadata index to: +// a. Maintain ImageID -> RepoTags, ImageID -> RepoDigset relationships; ImageID +// is the digest of image config, which conforms to oci image spec. +// b. Cache constant and useful information such as image chainID, config etc. +// c. An image will be added into the in-memory metadata only when it's successfully +// pulled and unpacked. +// +// 2) We use containerd image metadata store and content store: +// a. To resolve image reference (digest/tag) locally. During pulling image, we +// normalize the image reference provided by user, and put it into image metadata +// store with resolved descriptor. For the other operations, if image id is provided, +// we'll access the in-memory metadata index directly; if image reference is +// provided, we'll normalize it, resolve it in containerd image metadata store +// to get the image id. +// b. As the backup of in-memory metadata in 1). During startup, the in-memory +// metadata could be re-constructed from image metadata store + content store. +// +// Several problems with current approach: +// 1) An entry in containerd image metadata store doesn't mean a "READY" (successfully +// pulled and unpacked) image. E.g. during pulling, the client gets killed. In that case, +// if we saw an image without snapshots or with in-complete contents during startup, +// should we re-pull the image? Or should we remove the entry? +// +// yanxuean: We can't delete image directly, because we don't know if the image +// is pulled by us. There are resource leakage. +// +// 2) Containerd suggests user to add entry before pulling the image. However if +// an error occurs during the pulling, should we remove the entry from metadata +// store? Or should we leave it there until next startup (resource leakage)? +// +// 3) The cri plugin only exposes "READY" (successfully pulled and unpacked) images +// to the user, which are maintained in the in-memory metadata index. However, it's +// still possible that someone else removes the content or snapshot by-pass the cri plugin, +// how do we detect that and update the in-memory metadata correspondingly? Always +// check whether corresponding snapshot is ready when reporting image status? +// +// 4) Is the content important if we cached necessary information in-memory +// after we pull the image? How to manage the disk usage of contents? If some +// contents are missing but snapshots are ready, is the image still "READY"? + +// PullImage pulls an image with authentication config. +func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) { + imageRef := r.GetImage().GetImage() + namedRef, err := distribution.ParseDockerRef(imageRef) + if err != nil { + return nil, fmt.Errorf("failed to parse image reference %q: %w", imageRef, err) + } + ref := namedRef.String() + if ref != imageRef { + log.G(ctx).Debugf("PullImage using normalized image ref: %q", ref) + } + + imagePullProgressTimeout, err := time.ParseDuration(c.config.ImagePullProgressTimeout) + if err != nil { + return nil, fmt.Errorf("failed to parse image_pull_progress_timeout %q: %w", c.config.ImagePullProgressTimeout, err) + } + + var ( + pctx, pcancel = context.WithCancel(ctx) + + pullReporter = newPullProgressReporter(ref, pcancel, imagePullProgressTimeout) + + resolver = docker.NewResolver(docker.ResolverOptions{ + Headers: c.config.Registry.Headers, + Hosts: c.registryHosts(ctx, r.GetAuth(), pullReporter.optionUpdateClient), + }) + isSchema1 bool + imageHandler containerdimages.HandlerFunc = func(_ context.Context, + desc imagespec.Descriptor) ([]imagespec.Descriptor, error) { + if desc.MediaType == containerdimages.MediaTypeDockerSchema1Manifest { + isSchema1 = true + } + return nil, nil + } + ) + + defer pcancel() + snapshotter, err := c.snapshotterFromPodSandboxConfig(ctx, ref, r.SandboxConfig) + if err != nil { + return nil, err + } + log.G(ctx).Debugf("PullImage %q with snapshotter %s", ref, snapshotter) + + pullOpts := []containerd.RemoteOpt{ + containerd.WithSchema1Conversion, //nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility. + containerd.WithResolver(resolver), + containerd.WithPullSnapshotter(snapshotter), + containerd.WithPullUnpack, + containerd.WithPullLabel(imageLabelKey, imageLabelValue), + containerd.WithMaxConcurrentDownloads(c.config.MaxConcurrentDownloads), + containerd.WithImageHandler(imageHandler), + containerd.WithUnpackOpts([]containerd.UnpackOpt{ + containerd.WithUnpackDuplicationSuppressor(c.unpackDuplicationSuppressor), + }), + } + + pullOpts = append(pullOpts, c.encryptedImagesPullOpts()...) + if !c.config.ContainerdConfig.DisableSnapshotAnnotations { + pullOpts = append(pullOpts, + containerd.WithImageHandlerWrapper(appendInfoHandlerWrapper(ref))) + } + + if c.config.ContainerdConfig.DiscardUnpackedLayers { + // Allows GC to clean layers up from the content store after unpacking + pullOpts = append(pullOpts, + containerd.WithChildLabelMap(containerdimages.ChildGCLabelsFilterLayers)) + } + + pullReporter.start(pctx) + image, err := c.client.Pull(pctx, ref, pullOpts...) + pcancel() + if err != nil { + return nil, fmt.Errorf("failed to pull and unpack image %q: %w", ref, err) + } + + configDesc, err := image.Config(ctx) + if err != nil { + return nil, fmt.Errorf("get image config descriptor: %w", err) + } + imageID := configDesc.Digest.String() + + repoDigest, repoTag := getRepoDigestAndTag(namedRef, image.Target().Digest, isSchema1) + for _, r := range []string{imageID, repoTag, repoDigest} { + if r == "" { + continue + } + if err := c.createImageReference(ctx, r, image.Target()); err != nil { + return nil, fmt.Errorf("failed to create image reference %q: %w", r, err) + } + // Update image store to reflect the newest state in containerd. + // No need to use `updateImage`, because the image reference must + // have been managed by the cri plugin. + if err := c.imageStore.Update(ctx, r); err != nil { + return nil, fmt.Errorf("failed to update image store %q: %w", r, err) + } + } + + log.G(ctx).Debugf("Pulled image %q with image id %q, repo tag %q, repo digest %q", imageRef, imageID, + repoTag, repoDigest) + // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain + // in-memory image store, it's only for in-memory indexing. The image could be removed + // by someone else anytime, before/during/after we create the metadata. We should always + // check the actual state in containerd before using the image or returning status of the + // image. + return &runtime.PullImageResponse{ImageRef: imageID}, nil +} + +// ParseAuth parses AuthConfig and returns username and password/secret required by containerd. +func ParseAuth(auth *runtime.AuthConfig, host string) (string, string, error) { + if auth == nil { + return "", "", nil + } + if auth.ServerAddress != "" { + // Do not return the auth info when server address doesn't match. + u, err := url.Parse(auth.ServerAddress) + if err != nil { + return "", "", fmt.Errorf("parse server address: %w", err) + } + if host != u.Host { + return "", "", nil + } + } + if auth.Username != "" { + return auth.Username, auth.Password, nil + } + if auth.IdentityToken != "" { + return "", auth.IdentityToken, nil + } + if auth.Auth != "" { + decLen := base64.StdEncoding.DecodedLen(len(auth.Auth)) + decoded := make([]byte, decLen) + _, err := base64.StdEncoding.Decode(decoded, []byte(auth.Auth)) + if err != nil { + return "", "", err + } + fields := strings.SplitN(string(decoded), ":", 2) + if len(fields) != 2 { + return "", "", fmt.Errorf("invalid decoded auth: %q", decoded) + } + user, passwd := fields[0], fields[1] + return user, strings.Trim(passwd, "\x00"), nil + } + // TODO(random-liu): Support RegistryToken. + // An empty auth config is valid for anonymous registry + return "", "", nil +} + +// createImageReference creates image reference inside containerd image store. +// Note that because create and update are not finished in one transaction, there could be race. E.g. +// the image reference is deleted by someone else after create returns already exists, but before update +// happens. +func (c *criService) createImageReference(ctx context.Context, name string, desc imagespec.Descriptor) error { + img := containerdimages.Image{ + Name: name, + Target: desc, + // Add a label to indicate that the image is managed by the cri plugin. + Labels: map[string]string{imageLabelKey: imageLabelValue}, + } + // TODO(random-liu): Figure out which is the more performant sequence create then update or + // update then create. + oldImg, err := c.client.ImageService().Create(ctx, img) + if err == nil || !errdefs.IsAlreadyExists(err) { + return err + } + if oldImg.Target.Digest == img.Target.Digest && oldImg.Labels[imageLabelKey] == imageLabelValue { + return nil + } + _, err = c.client.ImageService().Update(ctx, img, "target", "labels."+imageLabelKey) + return err +} + +// updateImage updates image store to reflect the newest state of an image reference +// in containerd. If the reference is not managed by the cri plugin, the function also +// generates necessary metadata for the image and make it managed. +func (c *criService) updateImage(ctx context.Context, r string) error { + img, err := c.client.GetImage(ctx, r) + if err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("get image by reference: %w", err) + } + if err == nil && img.Labels()[imageLabelKey] != imageLabelValue { + // Make sure the image has the image id as its unique + // identifier that references the image in its lifetime. + configDesc, err := img.Config(ctx) + if err != nil { + return fmt.Errorf("get image id: %w", err) + } + id := configDesc.Digest.String() + if err := c.createImageReference(ctx, id, img.Target()); err != nil { + return fmt.Errorf("create image id reference %q: %w", id, err) + } + if err := c.imageStore.Update(ctx, id); err != nil { + return fmt.Errorf("update image store for %q: %w", id, err) + } + // The image id is ready, add the label to mark the image as managed. + if err := c.createImageReference(ctx, r, img.Target()); err != nil { + return fmt.Errorf("create managed label: %w", err) + } + } + // If the image is not found, we should continue updating the cache, + // so that the image can be removed from the cache. + if err := c.imageStore.Update(ctx, r); err != nil { + return fmt.Errorf("update image store for %q: %w", r, err) + } + return nil +} + +// getTLSConfig returns a TLSConfig configured with a CA/Cert/Key specified by registryTLSConfig +func (c *criService) getTLSConfig(registryTLSConfig criconfig.TLSConfig) (*tls.Config, error) { + var ( + tlsConfig = &tls.Config{} + cert tls.Certificate + err error + ) + if registryTLSConfig.CertFile != "" && registryTLSConfig.KeyFile == "" { + return nil, fmt.Errorf("cert file %q was specified, but no corresponding key file was specified", registryTLSConfig.CertFile) + } + if registryTLSConfig.CertFile == "" && registryTLSConfig.KeyFile != "" { + return nil, fmt.Errorf("key file %q was specified, but no corresponding cert file was specified", registryTLSConfig.KeyFile) + } + if registryTLSConfig.CertFile != "" && registryTLSConfig.KeyFile != "" { + cert, err = tls.LoadX509KeyPair(registryTLSConfig.CertFile, registryTLSConfig.KeyFile) + if err != nil { + return nil, fmt.Errorf("failed to load cert file: %w", err) + } + if len(cert.Certificate) != 0 { + tlsConfig.Certificates = []tls.Certificate{cert} + } + tlsConfig.BuildNameToCertificate() // nolint:staticcheck + } + + if registryTLSConfig.CAFile != "" { + caCertPool, err := x509.SystemCertPool() + if err != nil { + return nil, fmt.Errorf("failed to get system cert pool: %w", err) + } + caCert, err := os.ReadFile(registryTLSConfig.CAFile) + if err != nil { + return nil, fmt.Errorf("failed to load CA file: %w", err) + } + caCertPool.AppendCertsFromPEM(caCert) + tlsConfig.RootCAs = caCertPool + } + + tlsConfig.InsecureSkipVerify = registryTLSConfig.InsecureSkipVerify + return tlsConfig, nil +} + +func hostDirFromRoots(roots []string) func(string) (string, error) { + rootfn := make([]func(string) (string, error), len(roots)) + for i := range roots { + rootfn[i] = config.HostDirFromRoot(roots[i]) + } + return func(host string) (dir string, err error) { + for _, fn := range rootfn { + dir, err = fn(host) + if (err != nil && !errdefs.IsNotFound(err)) || (dir != "") { + break + } + } + return + } +} + +// registryHosts is the registry hosts to be used by the resolver. +func (c *criService) registryHosts(ctx context.Context, auth *runtime.AuthConfig, updateClientFn config.UpdateClientFunc) docker.RegistryHosts { + paths := filepath.SplitList(c.config.Registry.ConfigPath) + if len(paths) > 0 { + hostOptions := config.HostOptions{ + UpdateClient: updateClientFn, + } + hostOptions.Credentials = func(host string) (string, string, error) { + hostauth := auth + if hostauth == nil { + config := c.config.Registry.Configs[host] + if config.Auth != nil { + hostauth = toRuntimeAuthConfig(*config.Auth) + } + } + return ParseAuth(hostauth, host) + } + hostOptions.HostDir = hostDirFromRoots(paths) + + return config.ConfigureHosts(ctx, hostOptions) + } + + return func(host string) ([]docker.RegistryHost, error) { + var registries []docker.RegistryHost + + endpoints, err := c.registryEndpoints(host) + if err != nil { + return nil, fmt.Errorf("get registry endpoints: %w", err) + } + for _, e := range endpoints { + u, err := url.Parse(e) + if err != nil { + return nil, fmt.Errorf("parse registry endpoint %q from mirrors: %w", e, err) + } + + var ( + transport = newTransport() + client = &http.Client{Transport: transport} + config = c.config.Registry.Configs[u.Host] + ) + + if config.TLS != nil { + transport.TLSClientConfig, err = c.getTLSConfig(*config.TLS) + if err != nil { + return nil, fmt.Errorf("get TLSConfig for registry %q: %w", e, err) + } + } else if isLocalHost(host) && u.Scheme == "http" { + // Skipping TLS verification for localhost + transport.TLSClientConfig = &tls.Config{ + InsecureSkipVerify: true, + } + } + + // Make a copy of `auth`, so that different authorizers would not reference + // the same auth variable. + auth := auth + if auth == nil && config.Auth != nil { + auth = toRuntimeAuthConfig(*config.Auth) + } + + if updateClientFn != nil { + if err := updateClientFn(client); err != nil { + return nil, fmt.Errorf("failed to update http client: %w", err) + } + } + + authorizer := docker.NewDockerAuthorizer( + docker.WithAuthClient(client), + docker.WithAuthCreds(func(host string) (string, string, error) { + return ParseAuth(auth, host) + })) + + if u.Path == "" { + u.Path = "/v2" + } + + registries = append(registries, docker.RegistryHost{ + Client: client, + Authorizer: authorizer, + Host: u.Host, + Scheme: u.Scheme, + Path: u.Path, + Capabilities: docker.HostCapabilityResolve | docker.HostCapabilityPull, + }) + } + return registries, nil + } +} + +// defaultScheme returns the default scheme for a registry host. +func defaultScheme(host string) string { + if isLocalHost(host) { + return "http" + } + return "https" +} + +// isLocalHost checks if the registry host is local. +func isLocalHost(host string) bool { + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + + if host == "localhost" { + return true + } + + ip := net.ParseIP(host) + return ip.IsLoopback() +} + +// addDefaultScheme returns the endpoint with default scheme +func addDefaultScheme(endpoint string) (string, error) { + if strings.Contains(endpoint, "://") { + return endpoint, nil + } + ue := "dummy://" + endpoint + u, err := url.Parse(ue) + if err != nil { + return "", err + } + return fmt.Sprintf("%s://%s", defaultScheme(u.Host), endpoint), nil +} + +// registryEndpoints returns endpoints for a given host. +// It adds default registry endpoint if it does not exist in the passed-in endpoint list. +// It also supports wildcard host matching with `*`. +func (c *criService) registryEndpoints(host string) ([]string, error) { + var endpoints []string + _, ok := c.config.Registry.Mirrors[host] + if ok { + endpoints = c.config.Registry.Mirrors[host].Endpoints + } else { + endpoints = c.config.Registry.Mirrors["*"].Endpoints + } + defaultHost, err := docker.DefaultHost(host) + if err != nil { + return nil, fmt.Errorf("get default host: %w", err) + } + for i := range endpoints { + en, err := addDefaultScheme(endpoints[i]) + if err != nil { + return nil, fmt.Errorf("parse endpoint url: %w", err) + } + endpoints[i] = en + } + for _, e := range endpoints { + u, err := url.Parse(e) + if err != nil { + return nil, fmt.Errorf("parse endpoint url: %w", err) + } + if u.Host == host { + // Do not add default if the endpoint already exists. + return endpoints, nil + } + } + return append(endpoints, defaultScheme(defaultHost)+"://"+defaultHost), nil +} + +// newTransport returns a new HTTP transport used to pull image. +// TODO(random-liu): Create a library and share this code with `ctr`. +func newTransport() *http.Transport { + return &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + FallbackDelay: 300 * time.Millisecond, + }).DialContext, + MaxIdleConns: 10, + IdleConnTimeout: 30 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 5 * time.Second, + } +} + +// encryptedImagesPullOpts returns the necessary list of pull options required +// for decryption of encrypted images based on the cri decryption configuration. +func (c *criService) encryptedImagesPullOpts() []containerd.RemoteOpt { + if c.config.ImageDecryption.KeyModel == criconfig.KeyModelNode { + ltdd := imgcrypt.Payload{} + decUnpackOpt := encryption.WithUnpackConfigApplyOpts(encryption.WithDecryptedUnpack(<dd)) + opt := containerd.WithUnpackOpts([]containerd.UnpackOpt{decUnpackOpt}) + return []containerd.RemoteOpt{opt} + } + return nil +} + +const ( + // targetRefLabel is a label which contains image reference and will be passed + // to snapshotters. + targetRefLabel = "containerd.io/snapshot/cri.image-ref" + // targetManifestDigestLabel is a label which contains manifest digest and will be passed + // to snapshotters. + targetManifestDigestLabel = "containerd.io/snapshot/cri.manifest-digest" + // targetLayerDigestLabel is a label which contains layer digest and will be passed + // to snapshotters. + targetLayerDigestLabel = "containerd.io/snapshot/cri.layer-digest" + // targetImageLayersLabel is a label which contains layer digests contained in + // the target image and will be passed to snapshotters for preparing layers in + // parallel. Skipping some layers is allowed and only affects performance. + targetImageLayersLabel = "containerd.io/snapshot/cri.image-layers" +) + +// appendInfoHandlerWrapper makes a handler which appends some basic information +// of images like digests for manifest and their child layers as annotations during unpack. +// These annotations will be passed to snapshotters as labels. These labels will be +// used mainly by stargz-based snapshotters for querying image contents from the +// registry. +func appendInfoHandlerWrapper(ref string) func(f containerdimages.Handler) containerdimages.Handler { + return func(f containerdimages.Handler) containerdimages.Handler { + return containerdimages.HandlerFunc(func(ctx context.Context, desc imagespec.Descriptor) ([]imagespec.Descriptor, error) { + children, err := f.Handle(ctx, desc) + if err != nil { + return nil, err + } + switch desc.MediaType { + case imagespec.MediaTypeImageManifest, containerdimages.MediaTypeDockerSchema2Manifest: + for i := range children { + c := &children[i] + if containerdimages.IsLayerType(c.MediaType) { + if c.Annotations == nil { + c.Annotations = make(map[string]string) + } + c.Annotations[targetRefLabel] = ref + c.Annotations[targetLayerDigestLabel] = c.Digest.String() + c.Annotations[targetImageLayersLabel] = getLayers(ctx, targetImageLayersLabel, children[i:], labels.Validate) + c.Annotations[targetManifestDigestLabel] = desc.Digest.String() + } + } + } + return children, nil + }) + } +} + +// getLayers returns comma-separated digests based on the passed list of +// descriptors. The returned list contains as many digests as possible as well +// as meets the label validation. +func getLayers(ctx context.Context, key string, descs []imagespec.Descriptor, validate func(k, v string) error) (layers string) { + var item string + for _, l := range descs { + if containerdimages.IsLayerType(l.MediaType) { + item = l.Digest.String() + if layers != "" { + item = "," + item + } + // This avoids the label hits the size limitation. + if err := validate(key, layers+item); err != nil { + log.G(ctx).WithError(err).WithField("label", key).Debugf("%q is omitted in the layers list", l.Digest.String()) + break + } + layers += item + } + } + return +} + +const ( + // minPullProgressReportInternal is used to prevent the reporter from + // eating more CPU resources + minPullProgressReportInternal = 5 * time.Second + // defaultPullProgressReportInterval represents that how often the + // reporter checks that pull progress. + defaultPullProgressReportInterval = 10 * time.Second +) + +// pullProgressReporter is used to check single PullImage progress. +type pullProgressReporter struct { + ref string + cancel context.CancelFunc + reqReporter pullRequestReporter + timeout time.Duration +} + +func newPullProgressReporter(ref string, cancel context.CancelFunc, timeout time.Duration) *pullProgressReporter { + return &pullProgressReporter{ + ref: ref, + cancel: cancel, + reqReporter: pullRequestReporter{}, + timeout: timeout, + } +} + +func (reporter *pullProgressReporter) optionUpdateClient(client *http.Client) error { + client.Transport = &pullRequestReporterRoundTripper{ + rt: client.Transport, + reqReporter: &reporter.reqReporter, + } + return nil +} + +func (reporter *pullProgressReporter) start(ctx context.Context) { + if reporter.timeout == 0 { + log.G(ctx).Infof("no timeout and will not start pulling image %s reporter", reporter.ref) + return + } + + go func() { + var ( + reportInterval = defaultPullProgressReportInterval + + lastSeenBytesRead = uint64(0) + lastSeenTimestamp = time.Now() + ) + + // check progress more frequently if timeout < default internal + if reporter.timeout < reportInterval { + reportInterval = reporter.timeout / 2 + + if reportInterval < minPullProgressReportInternal { + reportInterval = minPullProgressReportInternal + } + } + + var ticker = time.NewTicker(reportInterval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + activeReqs, bytesRead := reporter.reqReporter.status() + + log.G(ctx).WithField("ref", reporter.ref). + WithField("activeReqs", activeReqs). + WithField("totalBytesRead", bytesRead). + WithField("lastSeenBytesRead", lastSeenBytesRead). + WithField("lastSeenTimestamp", lastSeenTimestamp). + WithField("reportInterval", reportInterval). + Tracef("progress for image pull") + + if activeReqs == 0 || bytesRead > lastSeenBytesRead { + lastSeenBytesRead = bytesRead + lastSeenTimestamp = time.Now() + continue + } + + if time.Since(lastSeenTimestamp) > reporter.timeout { + log.G(ctx).Errorf("cancel pulling image %s because of no progress in %v", reporter.ref, reporter.timeout) + reporter.cancel() + return + } + case <-ctx.Done(): + activeReqs, bytesRead := reporter.reqReporter.status() + log.G(ctx).Infof("stop pulling image %s: active requests=%v, bytes read=%v", reporter.ref, activeReqs, bytesRead) + return + } + } + }() +} + +// countingReadCloser wraps http.Response.Body with pull request reporter, +// which is used by pullRequestReporterRoundTripper. +type countingReadCloser struct { + once sync.Once + + rc io.ReadCloser + reqReporter *pullRequestReporter +} + +// Read reads bytes from original io.ReadCloser and increases bytes in +// pull request reporter. +func (r *countingReadCloser) Read(p []byte) (int, error) { + n, err := r.rc.Read(p) + r.reqReporter.incByteRead(uint64(n)) + return n, err +} + +// Close closes the original io.ReadCloser and only decreases the number of +// active pull requests once. +func (r *countingReadCloser) Close() error { + err := r.rc.Close() + r.once.Do(r.reqReporter.decRequest) + return err +} + +// pullRequestReporter is used to track the progress per each criapi.PullImage. +type pullRequestReporter struct { + // activeReqs indicates that current number of active pulling requests, + // including auth requests. + activeReqs int32 + // totalBytesRead indicates that the total bytes has been read from + // remote registry. + totalBytesRead uint64 +} + +func (reporter *pullRequestReporter) incRequest() { + atomic.AddInt32(&reporter.activeReqs, 1) +} + +func (reporter *pullRequestReporter) decRequest() { + atomic.AddInt32(&reporter.activeReqs, -1) +} + +func (reporter *pullRequestReporter) incByteRead(nr uint64) { + atomic.AddUint64(&reporter.totalBytesRead, nr) +} + +func (reporter *pullRequestReporter) status() (currentReqs int32, totalBytesRead uint64) { + currentReqs = atomic.LoadInt32(&reporter.activeReqs) + totalBytesRead = atomic.LoadUint64(&reporter.totalBytesRead) + return currentReqs, totalBytesRead +} + +// pullRequestReporterRoundTripper wraps http.RoundTripper with pull request +// reporter which is used to track the progress of active http request with +// counting readable http.Response.Body. +// +// NOTE: +// +// Although containerd provides ingester manager to track the progress +// of pulling request, for example `ctr image pull` shows the console progress +// bar, it needs more CPU resources to open/read the ingested files with +// acquiring containerd metadata plugin's boltdb lock. +// +// Before sending HTTP request to registry, the containerd.Client.Pull library +// will open writer by containerd ingester manager. Based on this, the +// http.RoundTripper wrapper can track the active progress with lower overhead +// even if the ref has been locked in ingester manager by other Pull request. +type pullRequestReporterRoundTripper struct { + rt http.RoundTripper + + reqReporter *pullRequestReporter +} + +func (rt *pullRequestReporterRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + rt.reqReporter.incRequest() + + resp, err := rt.rt.RoundTrip(req) + if err != nil { + rt.reqReporter.decRequest() + return nil, err + } + + resp.Body = &countingReadCloser{ + rc: resp.Body, + reqReporter: rt.reqReporter, + } + return resp, err +} + +// Given that runtime information is not passed from PullImageRequest, we depend on an experimental annotation +// passed from pod sandbox config to get the runtimeHandler. The annotation key is specified in configuration. +// Once we know the runtime, try to override default snapshotter if it is set for this runtime. +// See https://github.com/containerd/containerd/issues/6657 +func (c *criService) snapshotterFromPodSandboxConfig(ctx context.Context, imageRef string, + s *runtime.PodSandboxConfig) (string, error) { + snapshotter := c.config.ContainerdConfig.Snapshotter + if s == nil || s.Annotations == nil { + return snapshotter, nil + } + + runtimeHandler, ok := s.Annotations[annotations.RuntimeHandler] + if !ok { + return snapshotter, nil + } + + ociRuntime, err := c.getSandboxRuntime(s, runtimeHandler) + if err != nil { + return "", fmt.Errorf("experimental: failed to get sandbox runtime for %s, err: %+v", runtimeHandler, err) + } + + snapshotter = c.runtimeSnapshotter(context.Background(), ociRuntime) + log.G(ctx).Infof("experimental: PullImage %q for runtime %s, using snapshotter %s", imageRef, runtimeHandler, snapshotter) + return snapshotter, nil +} diff --git a/pkg/cri/sbserver/image_pull_test.go b/pkg/cri/sbserver/image_pull_test.go new file mode 100644 index 000000000..69c4db8f0 --- /dev/null +++ b/pkg/cri/sbserver/image_pull_test.go @@ -0,0 +1,445 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + "testing" + + "github.com/opencontainers/go-digest" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + criconfig "github.com/containerd/containerd/pkg/cri/config" +) + +func TestParseAuth(t *testing.T) { + testUser := "username" + testPasswd := "password" + testAuthLen := base64.StdEncoding.EncodedLen(len(testUser + ":" + testPasswd)) + testAuth := make([]byte, testAuthLen) + base64.StdEncoding.Encode(testAuth, []byte(testUser+":"+testPasswd)) + invalidAuth := make([]byte, testAuthLen) + base64.StdEncoding.Encode(invalidAuth, []byte(testUser+"@"+testPasswd)) + for desc, test := range map[string]struct { + auth *runtime.AuthConfig + host string + expectedUser string + expectedSecret string + expectErr bool + }{ + "should not return error if auth config is nil": {}, + "should not return error if empty auth is provided for access to anonymous registry": { + auth: &runtime.AuthConfig{}, + expectErr: false, + }, + "should support identity token": { + auth: &runtime.AuthConfig{IdentityToken: "abcd"}, + expectedSecret: "abcd", + }, + "should support username and password": { + auth: &runtime.AuthConfig{ + Username: testUser, + Password: testPasswd, + }, + expectedUser: testUser, + expectedSecret: testPasswd, + }, + "should support auth": { + auth: &runtime.AuthConfig{Auth: string(testAuth)}, + expectedUser: testUser, + expectedSecret: testPasswd, + }, + "should return error for invalid auth": { + auth: &runtime.AuthConfig{Auth: string(invalidAuth)}, + expectErr: true, + }, + "should return empty auth if server address doesn't match": { + auth: &runtime.AuthConfig{ + Username: testUser, + Password: testPasswd, + ServerAddress: "https://registry-1.io", + }, + host: "registry-2.io", + expectedUser: "", + expectedSecret: "", + }, + "should return auth if server address matches": { + auth: &runtime.AuthConfig{ + Username: testUser, + Password: testPasswd, + ServerAddress: "https://registry-1.io", + }, + host: "registry-1.io", + expectedUser: testUser, + expectedSecret: testPasswd, + }, + "should return auth if server address is not specified": { + auth: &runtime.AuthConfig{ + Username: testUser, + Password: testPasswd, + }, + host: "registry-1.io", + expectedUser: testUser, + expectedSecret: testPasswd, + }, + } { + t.Run(desc, func(t *testing.T) { + u, s, err := ParseAuth(test.auth, test.host) + assert.Equal(t, test.expectErr, err != nil) + assert.Equal(t, test.expectedUser, u) + assert.Equal(t, test.expectedSecret, s) + }) + } +} + +func TestRegistryEndpoints(t *testing.T) { + for desc, test := range map[string]struct { + mirrors map[string]criconfig.Mirror + host string + expected []string + }{ + "no mirror configured": { + mirrors: map[string]criconfig.Mirror{ + "registry-1.io": { + Endpoints: []string{ + "https://registry-1.io", + "https://registry-2.io", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-3.io", + }, + }, + "mirror configured": { + mirrors: map[string]criconfig.Mirror{ + "registry-3.io": { + Endpoints: []string{ + "https://registry-1.io", + "https://registry-2.io", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-1.io", + "https://registry-2.io", + "https://registry-3.io", + }, + }, + "wildcard mirror configured": { + mirrors: map[string]criconfig.Mirror{ + "*": { + Endpoints: []string{ + "https://registry-1.io", + "https://registry-2.io", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-1.io", + "https://registry-2.io", + "https://registry-3.io", + }, + }, + "host should take precedence if both host and wildcard mirrors are configured": { + mirrors: map[string]criconfig.Mirror{ + "*": { + Endpoints: []string{ + "https://registry-1.io", + }, + }, + "registry-3.io": { + Endpoints: []string{ + "https://registry-2.io", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-2.io", + "https://registry-3.io", + }, + }, + "default endpoint in list with http": { + mirrors: map[string]criconfig.Mirror{ + "registry-3.io": { + Endpoints: []string{ + "https://registry-1.io", + "https://registry-2.io", + "http://registry-3.io", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-1.io", + "https://registry-2.io", + "http://registry-3.io", + }, + }, + "default endpoint in list with https": { + mirrors: map[string]criconfig.Mirror{ + "registry-3.io": { + Endpoints: []string{ + "https://registry-1.io", + "https://registry-2.io", + "https://registry-3.io", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-1.io", + "https://registry-2.io", + "https://registry-3.io", + }, + }, + "default endpoint in list with path": { + mirrors: map[string]criconfig.Mirror{ + "registry-3.io": { + Endpoints: []string{ + "https://registry-1.io", + "https://registry-2.io", + "https://registry-3.io/path", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-1.io", + "https://registry-2.io", + "https://registry-3.io/path", + }, + }, + "miss scheme endpoint in list with path": { + mirrors: map[string]criconfig.Mirror{ + "registry-3.io": { + Endpoints: []string{ + "https://registry-3.io", + "registry-1.io", + "127.0.0.1:1234", + }, + }, + }, + host: "registry-3.io", + expected: []string{ + "https://registry-3.io", + "https://registry-1.io", + "http://127.0.0.1:1234", + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.config.Registry.Mirrors = test.mirrors + got, err := c.registryEndpoints(test.host) + assert.NoError(t, err) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestDefaultScheme(t *testing.T) { + for desc, test := range map[string]struct { + host string + expected string + }{ + "should use http by default for localhost": { + host: "localhost", + expected: "http", + }, + "should use http by default for localhost with port": { + host: "localhost:8080", + expected: "http", + }, + "should use http by default for 127.0.0.1": { + host: "127.0.0.1", + expected: "http", + }, + "should use http by default for 127.0.0.1 with port": { + host: "127.0.0.1:8080", + expected: "http", + }, + "should use http by default for ::1": { + host: "::1", + expected: "http", + }, + "should use http by default for ::1 with port": { + host: "[::1]:8080", + expected: "http", + }, + "should use https by default for remote host": { + host: "remote", + expected: "https", + }, + "should use https by default for remote host with port": { + host: "remote:8080", + expected: "https", + }, + "should use https by default for remote ip": { + host: "8.8.8.8", + expected: "https", + }, + "should use https by default for remote ip with port": { + host: "8.8.8.8:8080", + expected: "https", + }, + } { + t.Run(desc, func(t *testing.T) { + got := defaultScheme(test.host) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestEncryptedImagePullOpts(t *testing.T) { + for desc, test := range map[string]struct { + keyModel string + expectedOpts int + }{ + "node key model should return one unpack opt": { + keyModel: criconfig.KeyModelNode, + expectedOpts: 1, + }, + "no key model selected should default to node key model": { + keyModel: "", + expectedOpts: 0, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.config.ImageDecryption.KeyModel = test.keyModel + got := len(c.encryptedImagesPullOpts()) + assert.Equal(t, test.expectedOpts, got) + }) + } +} + +func TestImageLayersLabel(t *testing.T) { + sampleKey := "sampleKey" + sampleDigest, err := digest.Parse("sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + assert.NoError(t, err) + sampleMaxSize := 300 + sampleValidate := func(k, v string) error { + if (len(k) + len(v)) > sampleMaxSize { + return fmt.Errorf("invalid: %q: %q", k, v) + } + return nil + } + + tests := []struct { + name string + layersNum int + wantNum int + }{ + { + name: "valid number of layers", + layersNum: 2, + wantNum: 2, + }, + { + name: "many layers", + layersNum: 5, // hits sampleMaxSize (300 chars). + wantNum: 4, // layers should be omitted for avoiding invalid label. + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var sampleLayers []imagespec.Descriptor + for i := 0; i < tt.layersNum; i++ { + sampleLayers = append(sampleLayers, imagespec.Descriptor{ + MediaType: imagespec.MediaTypeImageLayerGzip, + Digest: sampleDigest, + }) + } + gotS := getLayers(context.Background(), sampleKey, sampleLayers, sampleValidate) + got := len(strings.Split(gotS, ",")) + assert.Equal(t, tt.wantNum, got) + }) + } +} + +func TestSnapshotterFromPodSandboxConfig(t *testing.T) { + defaultSnashotter := "native" + runtimeSnapshotter := "devmapper" + tests := []struct { + desc string + podSandboxConfig *runtime.PodSandboxConfig + expectSnapshotter string + expectErr error + }{ + { + desc: "should return default snapshotter for nil podSandboxConfig", + expectSnapshotter: defaultSnashotter, + }, + { + desc: "should return default snapshotter for nil podSandboxConfig.Annotations", + podSandboxConfig: &runtime.PodSandboxConfig{}, + expectSnapshotter: defaultSnashotter, + }, + { + desc: "should return default snapshotter for empty podSandboxConfig.Annotations", + podSandboxConfig: &runtime.PodSandboxConfig{ + Annotations: make(map[string]string), + }, + expectSnapshotter: defaultSnashotter, + }, + { + desc: "should return error for runtime not found", + podSandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.RuntimeHandler: "runtime-not-exists", + }, + }, + expectErr: fmt.Errorf(`experimental: failed to get sandbox runtime for runtime-not-exists, err: no runtime for "runtime-not-exists" is configured`), + expectSnapshotter: "", + }, + { + desc: "should return snapshotter provided in podSandboxConfig.Annotations", + podSandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.RuntimeHandler: "exiting-runtime", + }, + }, + expectSnapshotter: runtimeSnapshotter, + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + cri := newTestCRIService() + cri.config.ContainerdConfig.Snapshotter = defaultSnashotter + cri.config.ContainerdConfig.Runtimes = make(map[string]criconfig.Runtime) + cri.config.ContainerdConfig.Runtimes["exiting-runtime"] = criconfig.Runtime{ + Snapshotter: runtimeSnapshotter, + } + snapshotter, err := cri.snapshotterFromPodSandboxConfig(context.Background(), "test-image", tt.podSandboxConfig) + assert.Equal(t, tt.expectSnapshotter, snapshotter) + assert.Equal(t, tt.expectErr, err) + }) + } +} diff --git a/pkg/cri/sbserver/image_remove.go b/pkg/cri/sbserver/image_remove.go new file mode 100644 index 000000000..ee58a2197 --- /dev/null +++ b/pkg/cri/sbserver/image_remove.go @@ -0,0 +1,65 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/images" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// RemoveImage removes the image. +// TODO(random-liu): Update CRI to pass image reference instead of ImageSpec. (See +// kubernetes/kubernetes#46255) +// TODO(random-liu): We should change CRI to distinguish image id and image spec. +// Remove the whole image no matter the it's image id or reference. This is the +// semantic defined in CRI now. +func (c *criService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (*runtime.RemoveImageResponse, error) { + image, err := c.localResolve(r.GetImage().GetImage()) + if err != nil { + if errdefs.IsNotFound(err) { + // return empty without error when image not found. + return &runtime.RemoveImageResponse{}, nil + } + return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err) + } + + // Remove all image references. + for i, ref := range image.References { + var opts []images.DeleteOpt + if i == len(image.References)-1 { + // Delete the last image reference synchronously to trigger garbage collection. + // This is best effort. It is possible that the image reference is deleted by + // someone else before this point. + opts = []images.DeleteOpt{images.SynchronousDelete()} + } + err = c.client.ImageService().Delete(ctx, ref, opts...) + if err == nil || errdefs.IsNotFound(err) { + // Update image store to reflect the newest state in containerd. + if err := c.imageStore.Update(ctx, ref); err != nil { + return nil, fmt.Errorf("failed to update image reference %q for %q: %w", ref, image.ID, err) + } + continue + } + return nil, fmt.Errorf("failed to delete image reference %q for %q: %w", ref, image.ID, err) + } + return &runtime.RemoveImageResponse{}, nil +} diff --git a/pkg/cri/sbserver/image_status.go b/pkg/cri/sbserver/image_status.go new file mode 100644 index 000000000..b7ad2c1c8 --- /dev/null +++ b/pkg/cri/sbserver/image_status.go @@ -0,0 +1,105 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ImageStatus returns the status of the image, returns nil if the image isn't present. +// TODO(random-liu): We should change CRI to distinguish image id and image spec. (See +// kubernetes/kubernetes#46255) +func (c *criService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (*runtime.ImageStatusResponse, error) { + image, err := c.localResolve(r.GetImage().GetImage()) + if err != nil { + if errdefs.IsNotFound(err) { + // return empty without error when image not found. + return &runtime.ImageStatusResponse{}, nil + } + return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err) + } + // TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot + // doesn't exist? + + runtimeImage := toCRIImage(image) + info, err := c.toCRIImageInfo(ctx, &image, r.GetVerbose()) + if err != nil { + return nil, fmt.Errorf("failed to generate image info: %w", err) + } + + return &runtime.ImageStatusResponse{ + Image: runtimeImage, + Info: info, + }, nil +} + +// toCRIImage converts internal image object to CRI runtime.Image. +func toCRIImage(image imagestore.Image) *runtime.Image { + repoTags, repoDigests := parseImageReferences(image.References) + runtimeImage := &runtime.Image{ + Id: image.ID, + RepoTags: repoTags, + RepoDigests: repoDigests, + Size_: uint64(image.Size), + } + uid, username := getUserFromImage(image.ImageSpec.Config.User) + if uid != nil { + runtimeImage.Uid = &runtime.Int64Value{Value: *uid} + } + runtimeImage.Username = username + + return runtimeImage +} + +// TODO (mikebrow): discuss moving this struct and / or constants for info map for some or all of these fields to CRI +type verboseImageInfo struct { + ChainID string `json:"chainID"` + ImageSpec imagespec.Image `json:"imageSpec"` +} + +// toCRIImageInfo converts internal image object information to CRI image status response info map. +func (c *criService) toCRIImageInfo(ctx context.Context, image *imagestore.Image, verbose bool) (map[string]string, error) { + if !verbose { + return nil, nil + } + + info := make(map[string]string) + + imi := &verboseImageInfo{ + ChainID: image.ChainID, + ImageSpec: image.ImageSpec, + } + + m, err := json.Marshal(imi) + if err == nil { + info["info"] = string(m) + } else { + log.G(ctx).WithError(err).Errorf("failed to marshal info %v", imi) + info["info"] = err.Error() + } + + return info, nil +} diff --git a/pkg/cri/sbserver/image_status_test.go b/pkg/cri/sbserver/image_status_test.go new file mode 100644 index 000000000..9e6c20f4c --- /dev/null +++ b/pkg/cri/sbserver/image_status_test.go @@ -0,0 +1,74 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + imagestore "github.com/containerd/containerd/pkg/cri/store/image" +) + +func TestImageStatus(t *testing.T) { + testID := "sha256:d848ce12891bf78792cda4a23c58984033b0c397a55e93a1556202222ecc5ed4" + image := imagestore.Image{ + ID: testID, + ChainID: "test-chain-id", + References: []string{ + "gcr.io/library/busybox:latest", + "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", + }, + Size: 1234, + ImageSpec: imagespec.Image{ + Config: imagespec.ImageConfig{ + User: "user:group", + }, + }, + } + expected := &runtime.Image{ + Id: testID, + RepoTags: []string{"gcr.io/library/busybox:latest"}, + RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, + Size_: uint64(1234), + Username: "user", + } + + c := newTestCRIService() + t.Logf("should return nil image spec without error for non-exist image") + resp, err := c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{ + Image: &runtime.ImageSpec{Image: testID}, + }) + assert.NoError(t, err) + require.NotNil(t, resp) + assert.Nil(t, resp.GetImage()) + + c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image}) + assert.NoError(t, err) + + t.Logf("should return correct image status for exist image") + resp, err = c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{ + Image: &runtime.ImageSpec{Image: testID}, + }) + assert.NoError(t, err) + assert.NotNil(t, resp) + assert.Equal(t, expected, resp.GetImage()) +} diff --git a/pkg/cri/sbserver/imagefs_info.go b/pkg/cri/sbserver/imagefs_info.go new file mode 100644 index 000000000..79aee38ae --- /dev/null +++ b/pkg/cri/sbserver/imagefs_info.go @@ -0,0 +1,51 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "time" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ImageFsInfo returns information of the filesystem that is used to store images. +// TODO(windows): Usage for windows is always 0 right now. Support this for windows. +func (c *criService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) { + snapshots := c.snapshotStore.List() + timestamp := time.Now().UnixNano() + var usedBytes, inodesUsed uint64 + for _, sn := range snapshots { + // Use the oldest timestamp as the timestamp of imagefs info. + if sn.Timestamp < timestamp { + timestamp = sn.Timestamp + } + usedBytes += sn.Size + inodesUsed += sn.Inodes + } + // TODO(random-liu): Handle content store + return &runtime.ImageFsInfoResponse{ + ImageFilesystems: []*runtime.FilesystemUsage{ + { + Timestamp: timestamp, + FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPath}, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, + }, + }, + }, nil +} diff --git a/pkg/cri/sbserver/imagefs_info_test.go b/pkg/cri/sbserver/imagefs_info_test.go new file mode 100644 index 000000000..d587f2e08 --- /dev/null +++ b/pkg/cri/sbserver/imagefs_info_test.go @@ -0,0 +1,70 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + + snapshot "github.com/containerd/containerd/snapshots" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" +) + +func TestImageFsInfo(t *testing.T) { + c := newTestCRIService() + snapshots := []snapshotstore.Snapshot{ + { + Key: "key1", + Kind: snapshot.KindActive, + Size: 10, + Inodes: 100, + Timestamp: 234567, + }, + { + Key: "key2", + Kind: snapshot.KindCommitted, + Size: 20, + Inodes: 200, + Timestamp: 123456, + }, + { + Key: "key3", + Kind: snapshot.KindView, + Size: 0, + Inodes: 0, + Timestamp: 345678, + }, + } + expected := &runtime.FilesystemUsage{ + Timestamp: 123456, + FsId: &runtime.FilesystemIdentifier{Mountpoint: testImageFSPath}, + UsedBytes: &runtime.UInt64Value{Value: 30}, + InodesUsed: &runtime.UInt64Value{Value: 300}, + } + for _, sn := range snapshots { + c.snapshotStore.Add(sn) + } + resp, err := c.ImageFsInfo(context.Background(), &runtime.ImageFsInfoRequest{}) + require.NoError(t, err) + stats := resp.GetImageFilesystems() + assert.Len(t, stats, 1) + assert.Equal(t, expected, stats[0]) +} diff --git a/pkg/cri/sbserver/instrumented_service.go b/pkg/cri/sbserver/instrumented_service.go new file mode 100644 index 000000000..46f9cf727 --- /dev/null +++ b/pkg/cri/sbserver/instrumented_service.go @@ -0,0 +1,1632 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + runtime_alpha "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + ctrdutil "github.com/containerd/containerd/pkg/cri/util" +) + +// instrumentedService wraps service with containerd namespace and logs. +type instrumentedService struct { + c *criService +} + +func newInstrumentedService(c *criService) grpcServices { + return &instrumentedService{c: c} +} + +// instrumentedAlphaService wraps service with containerd namespace and logs. +type instrumentedAlphaService struct { + c *criService +} + +func newInstrumentedAlphaService(c *criService) grpcAlphaServices { + return &instrumentedAlphaService{c: c} +} + +// checkInitialized returns error if the server is not fully initialized. +// GRPC service request handlers should return error before server is fully +// initialized. +// NOTE(random-liu): All following functions MUST check initialized at the beginning. +func (in *instrumentedService) checkInitialized() error { + if in.c.initialized.IsSet() { + return nil + } + return errors.New("server is not initialized yet") +} + +// checkInitialized returns error if the server is not fully initialized. +// GRPC service request handlers should return error before server is fully +// initialized. +// NOTE(random-liu): All following functions MUST check initialized at the beginning. +func (in *instrumentedAlphaService) checkInitialized() error { + if in.c.initialized.IsSet() { + return nil + } + return errors.New("server is not initialized yet") +} + +func (in *instrumentedService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (res *runtime.RunPodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RunPodSandbox for %+v", r.GetConfig().GetMetadata()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RunPodSandbox for %+v failed, error", r.GetConfig().GetMetadata()) + } else { + log.G(ctx).Infof("RunPodSandbox for %+v returns sandbox id %q", r.GetConfig().GetMetadata(), res.GetPodSandboxId()) + } + }() + res, err = in.c.RunPodSandbox(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) RunPodSandbox(ctx context.Context, r *runtime_alpha.RunPodSandboxRequest) (res *runtime_alpha.RunPodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RunPodSandbox for %+v", r.GetConfig().GetMetadata()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RunPodSandbox for %+v failed, error", r.GetConfig().GetMetadata()) + } else { + log.G(ctx).Infof("RunPodSandbox for %+v returns sandbox id %q", r.GetConfig().GetMetadata(), res.GetPodSandboxId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.RunPodSandboxRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.RunPodSandboxResponse + v1res, err = in.c.RunPodSandbox(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.RunPodSandboxResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("RunPodSandbox for %+v failed, error", r.GetConfig().GetMetadata()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (res *runtime.ListPodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListPodSandbox with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ListPodSandbox failed") + } else { + log.G(ctx).Tracef("ListPodSandbox returns pod sandboxes %+v", res.GetItems()) + } + }() + res, err = in.c.ListPodSandbox(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ListPodSandbox(ctx context.Context, r *runtime_alpha.ListPodSandboxRequest) (res *runtime_alpha.ListPodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListPodSandbox with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ListPodSandbox failed") + } else { + log.G(ctx).Tracef("ListPodSandbox returns pod sandboxes %+v", res.GetItems()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ListPodSandboxRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ListPodSandboxResponse + v1res, err = in.c.ListPodSandbox(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ListPodSandboxResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Error("ListPodSandbox failed") + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (res *runtime.PodSandboxStatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("PodSandboxStatus for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("PodSandboxStatus for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Tracef("PodSandboxStatus for %q returns status %+v", r.GetPodSandboxId(), res.GetStatus()) + } + }() + res, err = in.c.PodSandboxStatus(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) PodSandboxStatus(ctx context.Context, r *runtime_alpha.PodSandboxStatusRequest) (res *runtime_alpha.PodSandboxStatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("PodSandboxStatus for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("PodSandboxStatus for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Tracef("PodSandboxStatus for %q returns status %+v", r.GetPodSandboxId(), res.GetStatus()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.PodSandboxStatusRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.PodSandboxStatusResponse + v1res, err = in.c.PodSandboxStatus(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.PodSandboxStatusResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("PodSandboxStatus for %q failed", r.GetPodSandboxId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (_ *runtime.StopPodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("StopPodSandbox for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("StopPodSandbox for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Infof("StopPodSandbox for %q returns successfully", r.GetPodSandboxId()) + } + }() + res, err := in.c.StopPodSandbox(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) StopPodSandbox(ctx context.Context, r *runtime_alpha.StopPodSandboxRequest) (res *runtime_alpha.StopPodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("StopPodSandbox for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("StopPodSandbox for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Infof("StopPodSandbox for %q returns successfully", r.GetPodSandboxId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.StopPodSandboxRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.StopPodSandboxResponse + v1res, err = in.c.StopPodSandbox(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.StopPodSandboxResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("StopPodSandbox for %q failed", r.GetPodSandboxId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (_ *runtime.RemovePodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RemovePodSandbox for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RemovePodSandbox for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Infof("RemovePodSandbox %q returns successfully", r.GetPodSandboxId()) + } + }() + res, err := in.c.RemovePodSandbox(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) RemovePodSandbox(ctx context.Context, r *runtime_alpha.RemovePodSandboxRequest) (res *runtime_alpha.RemovePodSandboxResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RemovePodSandbox for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RemovePodSandbox for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Infof("RemovePodSandbox %q returns successfully", r.GetPodSandboxId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.RemovePodSandboxRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.RemovePodSandboxResponse + v1res, err = in.c.RemovePodSandbox(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.RemovePodSandboxResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("RemovePodSandbox for %q failed", r.GetPodSandboxId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (res *runtime.PortForwardResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("Portforward for %q port %v", r.GetPodSandboxId(), r.GetPort()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("Portforward for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Infof("Portforward for %q returns URL %q", r.GetPodSandboxId(), res.GetUrl()) + } + }() + res, err = in.c.PortForward(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) PortForward(ctx context.Context, r *runtime_alpha.PortForwardRequest) (res *runtime_alpha.PortForwardResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("Portforward for %q port %v", r.GetPodSandboxId(), r.GetPort()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("Portforward for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Infof("Portforward for %q returns URL %q", r.GetPodSandboxId(), res.GetUrl()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.PortForwardRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.PortForwardResponse + v1res, err = in.c.PortForward(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.PortForwardResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("Portforward for %q failed", r.GetPodSandboxId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (res *runtime.CreateContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("CreateContainer within sandbox %q for container %+v", + r.GetPodSandboxId(), r.GetConfig().GetMetadata()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("CreateContainer within sandbox %q for %+v failed", + r.GetPodSandboxId(), r.GetConfig().GetMetadata()) + } else { + log.G(ctx).Infof("CreateContainer within sandbox %q for %+v returns container id %q", + r.GetPodSandboxId(), r.GetConfig().GetMetadata(), res.GetContainerId()) + } + }() + res, err = in.c.CreateContainer(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) CreateContainer(ctx context.Context, r *runtime_alpha.CreateContainerRequest) (res *runtime_alpha.CreateContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("CreateContainer within sandbox %q for container %+v", + r.GetPodSandboxId(), r.GetConfig().GetMetadata()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("CreateContainer within sandbox %q for %+v failed", + r.GetPodSandboxId(), r.GetConfig().GetMetadata()) + } else { + log.G(ctx).Infof("CreateContainer within sandbox %q for %+v returns container id %q", + r.GetPodSandboxId(), r.GetConfig().GetMetadata(), res.GetContainerId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.CreateContainerRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.CreateContainerResponse + v1res, err = in.c.CreateContainer(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.CreateContainerResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("CreateContainer within sandbox %q for %+v failed", + r.GetPodSandboxId(), r.GetConfig().GetMetadata()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (_ *runtime.StartContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("StartContainer for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("StartContainer for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("StartContainer for %q returns successfully", r.GetContainerId()) + } + }() + res, err := in.c.StartContainer(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) StartContainer(ctx context.Context, r *runtime_alpha.StartContainerRequest) (res *runtime_alpha.StartContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("StartContainer for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("StartContainer for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("StartContainer for %q returns successfully", r.GetContainerId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.StartContainerRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.StartContainerResponse + v1res, err = in.c.StartContainer(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.StartContainerResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("StartContainer for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (res *runtime.ListContainersResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListContainers with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ListContainers with filter %+v failed", r.GetFilter()) + } else { + log.G(ctx).Tracef("ListContainers with filter %+v returns containers %+v", + r.GetFilter(), res.GetContainers()) + } + }() + res, err = in.c.ListContainers(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ListContainers(ctx context.Context, r *runtime_alpha.ListContainersRequest) (res *runtime_alpha.ListContainersResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListContainers with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ListContainers with filter %+v failed", r.GetFilter()) + } else { + log.G(ctx).Tracef("ListContainers with filter %+v returns containers %+v", + r.GetFilter(), res.GetContainers()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ListContainersRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ListContainersResponse + v1res, err = in.c.ListContainers(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ListContainersResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ListContainers with filter %+v failed", r.GetFilter()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (res *runtime.ContainerStatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ContainerStatus for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ContainerStatus for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Tracef("ContainerStatus for %q returns status %+v", r.GetContainerId(), res.GetStatus()) + } + }() + res, err = in.c.ContainerStatus(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ContainerStatus(ctx context.Context, r *runtime_alpha.ContainerStatusRequest) (res *runtime_alpha.ContainerStatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ContainerStatus for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ContainerStatus for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Tracef("ContainerStatus for %q returns status %+v", r.GetContainerId(), res.GetStatus()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ContainerStatusRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ContainerStatusResponse + v1res, err = in.c.ContainerStatus(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ContainerStatusResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ContainerStatus for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (res *runtime.StopContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("StopContainer for %q with timeout %d (s)", r.GetContainerId(), r.GetTimeout()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("StopContainer for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("StopContainer for %q returns successfully", r.GetContainerId()) + } + }() + res, err = in.c.StopContainer(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) StopContainer(ctx context.Context, r *runtime_alpha.StopContainerRequest) (res *runtime_alpha.StopContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("StopContainer for %q with timeout %d (s)", r.GetContainerId(), r.GetTimeout()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("StopContainer for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("StopContainer for %q returns successfully", r.GetContainerId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.StopContainerRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.StopContainerResponse + v1res, err = in.c.StopContainer(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.StopContainerResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("StopContainer for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (res *runtime.RemoveContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RemoveContainer for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RemoveContainer for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("RemoveContainer for %q returns successfully", r.GetContainerId()) + } + }() + res, err = in.c.RemoveContainer(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) RemoveContainer(ctx context.Context, r *runtime_alpha.RemoveContainerRequest) (res *runtime_alpha.RemoveContainerResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RemoveContainer for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RemoveContainer for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("RemoveContainer for %q returns successfully", r.GetContainerId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.RemoveContainerRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.RemoveContainerResponse + v1res, err = in.c.RemoveContainer(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.RemoveContainerResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("RemoveContainer for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (res *runtime.ExecSyncResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ExecSync for %q with command %+v and timeout %d (s)", r.GetContainerId(), r.GetCmd(), r.GetTimeout()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ExecSync for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("ExecSync for %q returns with exit code %d", r.GetContainerId(), res.GetExitCode()) + } + }() + res, err = in.c.ExecSync(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ExecSync(ctx context.Context, r *runtime_alpha.ExecSyncRequest) (res *runtime_alpha.ExecSyncResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ExecSync for %q with command %+v and timeout %d (s)", r.GetContainerId(), r.GetCmd(), r.GetTimeout()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ExecSync for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("ExecSync for %q returns with exit code %d", r.GetContainerId(), res.GetExitCode()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ExecSyncRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ExecSyncResponse + v1res, err = in.c.ExecSync(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ExecSyncResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ExecSync for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) Exec(ctx context.Context, r *runtime.ExecRequest) (res *runtime.ExecResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("Exec for %q with command %+v, tty %v and stdin %v", + r.GetContainerId(), r.GetCmd(), r.GetTty(), r.GetStdin()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("Exec for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("Exec for %q returns URL %q", r.GetContainerId(), res.GetUrl()) + } + }() + res, err = in.c.Exec(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) Exec(ctx context.Context, r *runtime_alpha.ExecRequest) (res *runtime_alpha.ExecResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("Exec for %q with command %+v, tty %v and stdin %v", + r.GetContainerId(), r.GetCmd(), r.GetTty(), r.GetStdin()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("Exec for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("Exec for %q returns URL %q", r.GetContainerId(), res.GetUrl()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ExecRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ExecResponse + v1res, err = in.c.Exec(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ExecResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("Exec for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) Attach(ctx context.Context, r *runtime.AttachRequest) (res *runtime.AttachResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("Attach for %q with tty %v and stdin %v", r.GetContainerId(), r.GetTty(), r.GetStdin()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("Attach for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("Attach for %q returns URL %q", r.GetContainerId(), res.Url) + } + }() + res, err = in.c.Attach(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) Attach(ctx context.Context, r *runtime_alpha.AttachRequest) (res *runtime_alpha.AttachResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("Attach for %q with tty %v and stdin %v", r.GetContainerId(), r.GetTty(), r.GetStdin()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("Attach for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("Attach for %q returns URL %q", r.GetContainerId(), res.Url) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.AttachRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.AttachResponse + v1res, err = in.c.Attach(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.AttachResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("Attach for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (res *runtime.UpdateContainerResourcesResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("UpdateContainerResources for %q with Linux: %+v / Windows: %+v", r.GetContainerId(), r.GetLinux(), r.GetWindows()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("UpdateContainerResources for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("UpdateContainerResources for %q returns successfully", r.GetContainerId()) + } + }() + res, err = in.c.UpdateContainerResources(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) UpdateContainerResources(ctx context.Context, r *runtime_alpha.UpdateContainerResourcesRequest) (res *runtime_alpha.UpdateContainerResourcesResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("UpdateContainerResources for %q with Linux: %+v / Windows: %+v", r.GetContainerId(), r.GetLinux(), r.GetWindows()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("UpdateContainerResources for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Infof("UpdateContainerResources for %q returns successfully", r.GetContainerId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.UpdateContainerResourcesRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.UpdateContainerResourcesResponse + v1res, err = in.c.UpdateContainerResources(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.UpdateContainerResourcesResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("UpdateContainerResources for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (res *runtime.PullImageResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("PullImage %q", r.GetImage().GetImage()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("PullImage %q failed", r.GetImage().GetImage()) + } else { + log.G(ctx).Infof("PullImage %q returns image reference %q", + r.GetImage().GetImage(), res.GetImageRef()) + } + }() + res, err = in.c.PullImage(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) PullImage(ctx context.Context, r *runtime_alpha.PullImageRequest) (res *runtime_alpha.PullImageResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("PullImage %q", r.GetImage().GetImage()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("PullImage %q failed", r.GetImage().GetImage()) + } else { + log.G(ctx).Infof("PullImage %q returns image reference %q", + r.GetImage().GetImage(), res.GetImageRef()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.PullImageRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.PullImageResponse + v1res, err = in.c.PullImage(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.PullImageResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("PullImage %q failed", r.GetImage().GetImage()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (res *runtime.ListImagesResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListImages with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ListImages with filter %+v failed", r.GetFilter()) + } else { + log.G(ctx).Tracef("ListImages with filter %+v returns image list %+v", + r.GetFilter(), res.GetImages()) + } + }() + res, err = in.c.ListImages(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ListImages(ctx context.Context, r *runtime_alpha.ListImagesRequest) (res *runtime_alpha.ListImagesResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListImages with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ListImages with filter %+v failed", r.GetFilter()) + } else { + log.G(ctx).Tracef("ListImages with filter %+v returns image list %+v", + r.GetFilter(), res.GetImages()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ListImagesRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ListImagesResponse + v1res, err = in.c.ListImages(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ListImagesResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ListImages with filter %+v failed", r.GetFilter()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (res *runtime.ImageStatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ImageStatus for %q", r.GetImage().GetImage()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ImageStatus for %q failed", r.GetImage().GetImage()) + } else { + log.G(ctx).Tracef("ImageStatus for %q returns image status %+v", + r.GetImage().GetImage(), res.GetImage()) + } + }() + res, err = in.c.ImageStatus(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ImageStatus(ctx context.Context, r *runtime_alpha.ImageStatusRequest) (res *runtime_alpha.ImageStatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ImageStatus for %q", r.GetImage().GetImage()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ImageStatus for %q failed", r.GetImage().GetImage()) + } else { + log.G(ctx).Tracef("ImageStatus for %q returns image status %+v", + r.GetImage().GetImage(), res.GetImage()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ImageStatusRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ImageStatusResponse + v1res, err = in.c.ImageStatus(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ImageStatusResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ImageStatus for %q failed", r.GetImage().GetImage()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (_ *runtime.RemoveImageResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RemoveImage %q", r.GetImage().GetImage()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RemoveImage %q failed", r.GetImage().GetImage()) + } else { + log.G(ctx).Infof("RemoveImage %q returns successfully", r.GetImage().GetImage()) + } + }() + res, err := in.c.RemoveImage(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) RemoveImage(ctx context.Context, r *runtime_alpha.RemoveImageRequest) (res *runtime_alpha.RemoveImageResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Infof("RemoveImage %q", r.GetImage().GetImage()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("RemoveImage %q failed", r.GetImage().GetImage()) + } else { + log.G(ctx).Infof("RemoveImage %q returns successfully", r.GetImage().GetImage()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.RemoveImageRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.RemoveImageResponse + v1res, err = in.c.RemoveImage(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.RemoveImageResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("RemoveImage %q failed", r.GetImage().GetImage()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (res *runtime.ImageFsInfoResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ImageFsInfo") + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ImageFsInfo failed") + } else { + log.G(ctx).Debugf("ImageFsInfo returns filesystem info %+v", res.ImageFilesystems) + } + }() + res, err = in.c.ImageFsInfo(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ImageFsInfo(ctx context.Context, r *runtime_alpha.ImageFsInfoRequest) (res *runtime_alpha.ImageFsInfoResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ImageFsInfo") + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ImageFsInfo failed") + } else { + log.G(ctx).Debugf("ImageFsInfo returns filesystem info %+v", res.ImageFilesystems) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ImageFsInfoRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ImageFsInfoResponse + v1res, err = in.c.ImageFsInfo(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ImageFsInfoResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Error("ImageFsInfo failed") + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) PodSandboxStats(ctx context.Context, r *runtime.PodSandboxStatsRequest) (res *runtime.PodSandboxStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("PodSandboxStats for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("PodSandboxStats for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Debugf("PodSandboxStats for %q returns stats %+v", r.GetPodSandboxId(), res.GetStats()) + } + }() + res, err = in.c.PodSandboxStats(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) PodSandboxStats(ctx context.Context, r *runtime_alpha.PodSandboxStatsRequest) (res *runtime_alpha.PodSandboxStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("PodSandboxStats for %q", r.GetPodSandboxId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("PodSandboxStats for %q failed", r.GetPodSandboxId()) + } else { + log.G(ctx).Debugf("PodSandboxStats for %q returns stats %+v", r.GetPodSandboxId(), res.GetStats()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.PodSandboxStatsRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.PodSandboxStatsResponse + v1res, err = in.c.PodSandboxStats(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.PodSandboxStatsResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(err).Errorf("PodSandboxStats for %q failed", r.GetPodSandboxId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ContainerStats(ctx context.Context, r *runtime.ContainerStatsRequest) (res *runtime.ContainerStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ContainerStats for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ContainerStats for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("ContainerStats for %q returns stats %+v", r.GetContainerId(), res.GetStats()) + } + }() + res, err = in.c.ContainerStats(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ContainerStats(ctx context.Context, r *runtime_alpha.ContainerStatsRequest) (res *runtime_alpha.ContainerStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ContainerStats for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ContainerStats for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("ContainerStats for %q returns stats %+v", r.GetContainerId(), res.GetStats()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ContainerStatsRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ContainerStatsResponse + v1res, err = in.c.ContainerStats(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ContainerStatsResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ContainerStats for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ListPodSandboxStats(ctx context.Context, r *runtime.ListPodSandboxStatsRequest) (res *runtime.ListPodSandboxStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListPodSandboxStats with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ListPodSandboxStats failed") + } else { + log.G(ctx).Tracef("ListPodSandboxStats returns stats %+v", res.GetStats()) + } + }() + res, err = in.c.ListPodSandboxStats(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ListPodSandboxStats(ctx context.Context, r *runtime_alpha.ListPodSandboxStatsRequest) (res *runtime_alpha.ListPodSandboxStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListPodSandboxStats with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ListPodSandboxStats failed") + } else { + log.G(ctx).Tracef("ListPodSandboxStats returns stats %+v", res.GetStats()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ListPodSandboxStatsRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ListPodSandboxStatsResponse + v1res, err = in.c.ListPodSandboxStats(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ListPodSandboxStatsResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Error("ListPodSandboxStats failed") + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ListContainerStats(ctx context.Context, r *runtime.ListContainerStatsRequest) (res *runtime.ListContainerStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListContainerStats with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ListContainerStats failed") + } else { + log.G(ctx).Tracef("ListContainerStats returns stats %+v", res.GetStats()) + } + }() + res, err = in.c.ListContainerStats(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ListContainerStats(ctx context.Context, r *runtime_alpha.ListContainerStatsRequest) (res *runtime_alpha.ListContainerStatsResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("ListContainerStats with filter %+v", r.GetFilter()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("ListContainerStats failed") + } else { + log.G(ctx).Tracef("ListContainerStats returns stats %+v", res.GetStats()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ListContainerStatsRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ListContainerStatsResponse + v1res, err = in.c.ListContainerStats(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ListContainerStatsResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Error("ListContainerStats failed") + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) Status(ctx context.Context, r *runtime.StatusRequest) (res *runtime.StatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("Status") + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("Status failed") + } else { + log.G(ctx).Tracef("Status returns status %+v", res.GetStatus()) + } + }() + res, err = in.c.Status(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) Status(ctx context.Context, r *runtime_alpha.StatusRequest) (res *runtime_alpha.StatusResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("Status") + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("Status failed") + } else { + log.G(ctx).Tracef("Status returns status %+v", res.GetStatus()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.StatusRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.StatusResponse + v1res, err = in.c.Status(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.StatusResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Error("Status failed") + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) Version(ctx context.Context, r *runtime.VersionRequest) (res *runtime.VersionResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("Version with client side version %q", r.GetVersion()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("Version failed") + } else { + log.G(ctx).Tracef("Version returns %+v", res) + } + }() + res, err = in.c.Version(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) Version(ctx context.Context, r *runtime_alpha.VersionRequest) (res *runtime_alpha.VersionResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Tracef("Version with client side version %q", r.GetVersion()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("Version failed") + } else { + log.G(ctx).Tracef("Version returns %+v", res) + } + }() + res, err = in.c.AlphaVersion(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (res *runtime.UpdateRuntimeConfigResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("UpdateRuntimeConfig with config %+v", r.GetRuntimeConfig()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("UpdateRuntimeConfig failed") + } else { + log.G(ctx).Debug("UpdateRuntimeConfig returns returns successfully") + } + }() + res, err = in.c.UpdateRuntimeConfig(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) UpdateRuntimeConfig(ctx context.Context, r *runtime_alpha.UpdateRuntimeConfigRequest) (res *runtime_alpha.UpdateRuntimeConfigResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("UpdateRuntimeConfig with config %+v", r.GetRuntimeConfig()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Error("UpdateRuntimeConfig failed") + } else { + log.G(ctx).Debug("UpdateRuntimeConfig returns returns successfully") + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.UpdateRuntimeConfigRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.UpdateRuntimeConfigResponse + v1res, err = in.c.UpdateRuntimeConfig(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.UpdateRuntimeConfigResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Error("UpdateRuntimeConfig failed") + } + } + } + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (res *runtime.ReopenContainerLogResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ReopenContainerLog for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ReopenContainerLog for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("ReopenContainerLog for %q returns successfully", r.GetContainerId()) + } + }() + res, err = in.c.ReopenContainerLog(ctrdutil.WithNamespace(ctx), r) + return res, errdefs.ToGRPC(err) +} + +func (in *instrumentedAlphaService) ReopenContainerLog(ctx context.Context, r *runtime_alpha.ReopenContainerLogRequest) (res *runtime_alpha.ReopenContainerLogResponse, err error) { + if err := in.checkInitialized(); err != nil { + return nil, err + } + log.G(ctx).Debugf("ReopenContainerLog for %q", r.GetContainerId()) + defer func() { + if err != nil { + log.G(ctx).WithError(err).Errorf("ReopenContainerLog for %q failed", r.GetContainerId()) + } else { + log.G(ctx).Debugf("ReopenContainerLog for %q returns successfully", r.GetContainerId()) + } + }() + // converts request and response for earlier CRI version to call and get response from the current version + var v1r runtime.ReopenContainerLogRequest + if err := alphaReqToV1Req(r, &v1r); err != nil { + return nil, errdefs.ToGRPC(err) + } + var v1res *runtime.ReopenContainerLogResponse + v1res, err = in.c.ReopenContainerLog(ctrdutil.WithNamespace(ctx), &v1r) + if v1res != nil { + resp := &runtime_alpha.ReopenContainerLogResponse{} + perr := v1RespToAlphaResp(v1res, resp) + if perr == nil { + res = resp + } else { + // actual error has precidence on error returned vs parse error issues + if err == nil { + err = perr + } else { + // extra log entry if convert response parse error and request error + log.G(ctx).WithError(perr).Errorf("ReopenContainerLog for %q failed", r.GetContainerId()) + } + } + } + return res, errdefs.ToGRPC(err) +} + +func alphaReqToV1Req( + alphar interface{ Marshal() ([]byte, error) }, + v1r interface{ Unmarshal(_ []byte) error }, +) error { + p, err := alphar.Marshal() + if err != nil { + return err + } + + if err = v1r.Unmarshal(p); err != nil { + return err + } + return nil +} + +func v1RespToAlphaResp( + v1res interface{ Marshal() ([]byte, error) }, + alphares interface{ Unmarshal(_ []byte) error }, +) error { + p, err := v1res.Marshal() + if err != nil { + return err + } + + if err = alphares.Unmarshal(p); err != nil { + return err + } + return nil +} diff --git a/pkg/cri/sbserver/metrics.go b/pkg/cri/sbserver/metrics.go new file mode 100644 index 000000000..99b3f5d66 --- /dev/null +++ b/pkg/cri/sbserver/metrics.go @@ -0,0 +1,58 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "github.com/docker/go-metrics" +) + +var ( + sandboxListTimer metrics.Timer + sandboxCreateNetworkTimer metrics.Timer + sandboxDeleteNetwork metrics.Timer + + sandboxRuntimeCreateTimer metrics.LabeledTimer + sandboxRuntimeStopTimer metrics.LabeledTimer + sandboxRemoveTimer metrics.LabeledTimer + + containerListTimer metrics.Timer + containerRemoveTimer metrics.LabeledTimer + containerCreateTimer metrics.LabeledTimer + containerStopTimer metrics.LabeledTimer + containerStartTimer metrics.LabeledTimer +) + +func init() { + // these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle. + ns := metrics.NewNamespace("containerd", "cri", nil) + + sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes") + sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox") + sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network") + + sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime") + sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime") + sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime") + + containerListTimer = ns.NewTimer("container_list", "time to list containers") + containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime") + containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime") + containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") + containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + + metrics.Register(ns) +} diff --git a/pkg/cri/sbserver/opts.go b/pkg/cri/sbserver/opts.go new file mode 100644 index 000000000..1095fdd75 --- /dev/null +++ b/pkg/cri/sbserver/opts.go @@ -0,0 +1,51 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/log" + "github.com/containerd/nri" + v1 "github.com/containerd/nri/types/v1" +) + +// WithNRISandboxDelete calls delete for a sandbox'd task +func WithNRISandboxDelete(sandboxID string) containerd.ProcessDeleteOpts { + return func(ctx context.Context, p containerd.Process) error { + task, ok := p.(containerd.Task) + if !ok { + return nil + } + nric, err := nri.New() + if err != nil { + log.G(ctx).WithError(err).Error("unable to create nri client") + return nil + } + if nric == nil { + return nil + } + sb := &nri.Sandbox{ + ID: sandboxID, + } + if _, err := nric.InvokeWithSandbox(ctx, task, v1.Delete, sb); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to delete nri for %q", task.ID()) + } + return nil + } +} diff --git a/pkg/cri/sbserver/rdt_linux.go b/pkg/cri/sbserver/rdt_linux.go new file mode 100644 index 000000000..188c2100a --- /dev/null +++ b/pkg/cri/sbserver/rdt_linux.go @@ -0,0 +1,51 @@ +//go:build !no_rdt +// +build !no_rdt + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + + "github.com/containerd/containerd/services/tasks" + "github.com/intel/goresctrl/pkg/rdt" + "github.com/sirupsen/logrus" +) + +// rdtClassFromAnnotations examines container and pod annotations of a +// container and returns its effective RDT class. +func (c *criService) rdtClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { + cls, err := rdt.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations) + + if err == nil { + // Our internal check that RDT has been enabled + if cls != "" && !tasks.RdtEnabled() { + err = fmt.Errorf("RDT disabled, refusing to set RDT class of container %q to %q", containerName, cls) + } + } + + if err != nil { + if !tasks.RdtEnabled() && c.config.ContainerdConfig.IgnoreRdtNotEnabledErrors { + logrus.Debugf("continuing create container %s, ignoring rdt not enabled (%v)", containerName, err) + return "", nil + } + return "", err + } + + return cls, nil +} diff --git a/pkg/cri/sbserver/rdt_stub_linux.go b/pkg/cri/sbserver/rdt_stub_linux.go new file mode 100644 index 000000000..ac1e65906 --- /dev/null +++ b/pkg/cri/sbserver/rdt_stub_linux.go @@ -0,0 +1,24 @@ +//go:build no_rdt +// +build no_rdt + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +func (c *criService) rdtClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { + return "", nil +} diff --git a/pkg/cri/sbserver/restart.go b/pkg/cri/sbserver/restart.go new file mode 100644 index 000000000..6bc91968c --- /dev/null +++ b/pkg/cri/sbserver/restart.go @@ -0,0 +1,511 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "os" + "path/filepath" + goruntime "runtime" + "sync" + "time" + + "github.com/containerd/containerd" + containerdio "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/errdefs" + containerdimages "github.com/containerd/containerd/images" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/platforms" + "github.com/containerd/typeurl" + "golang.org/x/sync/errgroup" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + cio "github.com/containerd/containerd/pkg/cri/io" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + "github.com/containerd/containerd/pkg/netns" +) + +// NOTE: The recovery logic has following assumption: when the cri plugin is down: +// 1) Files (e.g. root directory, netns) and checkpoint maintained by the plugin MUST NOT be +// touched. Or else, recovery logic for those containers/sandboxes may return error. +// 2) Containerd containers may be deleted, but SHOULD NOT be added. Or else, recovery logic +// for the newly added container/sandbox will return error, because there is no corresponding root +// directory created. +// 3) Containerd container tasks may exit or be stopped, deleted. Even though current logic could +// tolerant tasks being created or started, we prefer that not to happen. + +// recover recovers system state from containerd and status checkpoint. +func (c *criService) recover(ctx context.Context) error { + // Recover all sandboxes. + sandboxes, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindSandbox)) + if err != nil { + return fmt.Errorf("failed to list sandbox containers: %w", err) + } + + eg, ctx2 := errgroup.WithContext(ctx) + for _, sandbox := range sandboxes { + sandbox := sandbox + eg.Go(func() error { + sb, err := c.loadSandbox(ctx2, sandbox) + if err != nil { + log.G(ctx2).WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID()) + return nil + } + log.G(ctx2).Debugf("Loaded sandbox %+v", sb) + if err := c.sandboxStore.Add(sb); err != nil { + return fmt.Errorf("failed to add sandbox %q to store: %w", sandbox.ID(), err) + } + if err := c.sandboxNameIndex.Reserve(sb.Name, sb.ID); err != nil { + return fmt.Errorf("failed to reserve sandbox name %q: %w", sb.Name, err) + } + return nil + }) + } + if err := eg.Wait(); err != nil { + return err + } + + // Recover all containers. + containers, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindContainer)) + if err != nil { + return fmt.Errorf("failed to list containers: %w", err) + } + eg, ctx2 = errgroup.WithContext(ctx) + for _, container := range containers { + container := container + eg.Go(func() error { + cntr, err := c.loadContainer(ctx2, container) + if err != nil { + log.G(ctx2).WithError(err).Errorf("Failed to load container %q", container.ID()) + return nil + } + log.G(ctx2).Debugf("Loaded container %+v", cntr) + if err := c.containerStore.Add(cntr); err != nil { + return fmt.Errorf("failed to add container %q to store: %w", container.ID(), err) + } + if err := c.containerNameIndex.Reserve(cntr.Name, cntr.ID); err != nil { + return fmt.Errorf("failed to reserve container name %q: %w", cntr.Name, err) + } + return nil + }) + } + if err := eg.Wait(); err != nil { + return err + } + + // Recover all images. + cImages, err := c.client.ListImages(ctx) + if err != nil { + return fmt.Errorf("failed to list images: %w", err) + } + c.loadImages(ctx, cImages) + + // It's possible that containerd containers are deleted unexpectedly. In that case, + // we can't even get metadata, we should cleanup orphaned sandbox/container directories + // with best effort. + + // Cleanup orphaned sandbox and container directories without corresponding containerd container. + for _, cleanup := range []struct { + cntrs []containerd.Container + base string + errMsg string + }{ + { + cntrs: sandboxes, + base: filepath.Join(c.config.RootDir, sandboxesDir), + errMsg: "failed to cleanup orphaned sandbox directories", + }, + { + cntrs: sandboxes, + base: filepath.Join(c.config.StateDir, sandboxesDir), + errMsg: "failed to cleanup orphaned volatile sandbox directories", + }, + { + cntrs: containers, + base: filepath.Join(c.config.RootDir, containersDir), + errMsg: "failed to cleanup orphaned container directories", + }, + { + cntrs: containers, + base: filepath.Join(c.config.StateDir, containersDir), + errMsg: "failed to cleanup orphaned volatile container directories", + }, + } { + if err := cleanupOrphanedIDDirs(ctx, cleanup.cntrs, cleanup.base); err != nil { + return fmt.Errorf("%s: %w", cleanup.errMsg, err) + } + } + return nil +} + +// loadContainerTimeout is the default timeout for loading a container/sandbox. +// One container/sandbox hangs (e.g. containerd#2438) should not affect other +// containers/sandboxes. +// Most CRI container/sandbox related operations are per container, the ones +// which handle multiple containers at a time are: +// * ListPodSandboxes: Don't talk with containerd services. +// * ListContainers: Don't talk with containerd services. +// * ListContainerStats: Not in critical code path, a default timeout will +// be applied at CRI level. +// * Recovery logic: We should set a time for each container/sandbox recovery. +// * Event monitor: We should set a timeout for each container/sandbox event handling. +const loadContainerTimeout = 10 * time.Second + +// loadContainer loads container from containerd and status checkpoint. +func (c *criService) loadContainer(ctx context.Context, cntr containerd.Container) (containerstore.Container, error) { + ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout) + defer cancel() + id := cntr.ID() + containerDir := c.getContainerRootDir(id) + volatileContainerDir := c.getVolatileContainerRootDir(id) + var container containerstore.Container + // Load container metadata. + exts, err := cntr.Extensions(ctx) + if err != nil { + return container, fmt.Errorf("failed to get container extensions: %w", err) + } + ext, ok := exts[containerMetadataExtension] + if !ok { + return container, fmt.Errorf("metadata extension %q not found", containerMetadataExtension) + } + data, err := typeurl.UnmarshalAny(ext) + if err != nil { + return container, fmt.Errorf("failed to unmarshal metadata extension %q: %w", ext, err) + } + meta := data.(*containerstore.Metadata) + + // Load status from checkpoint. + status, err := containerstore.LoadStatus(containerDir, id) + if err != nil { + log.G(ctx).WithError(err).Warnf("Failed to load container status for %q", id) + status = unknownContainerStatus() + } + + var containerIO *cio.ContainerIO + err = func() error { + // Load up-to-date status from containerd. + t, err := cntr.Task(ctx, func(fifos *containerdio.FIFOSet) (_ containerdio.IO, err error) { + stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, meta.Config.GetTty()) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + if stdoutWC != nil { + stdoutWC.Close() + } + if stderrWC != nil { + stderrWC.Close() + } + } + }() + containerIO, err = cio.NewContainerIO(id, + cio.WithFIFOs(fifos), + ) + if err != nil { + return nil, err + } + containerIO.AddOutput("log", stdoutWC, stderrWC) + containerIO.Pipe() + return containerIO, nil + }) + if err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to load task: %w", err) + } + var s containerd.Status + var notFound bool + if errdefs.IsNotFound(err) { + // Task is not found. + notFound = true + } else { + // Task is found. Get task status. + s, err = t.Status(ctx) + if err != nil { + // It's still possible that task is deleted during this window. + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to get task status: %w", err) + } + notFound = true + } + } + if notFound { + // Task is not created or has been deleted, use the checkpointed status + // to generate container status. + switch status.State() { + case runtime.ContainerState_CONTAINER_CREATED: + // NOTE: Another possibility is that we've tried to start the container, but + // containerd got restarted during that. In that case, we still + // treat the container as `CREATED`. + containerIO, err = cio.NewContainerIO(id, + cio.WithNewFIFOs(volatileContainerDir, meta.Config.GetTty(), meta.Config.GetStdin()), + ) + if err != nil { + return fmt.Errorf("failed to create container io: %w", err) + } + case runtime.ContainerState_CONTAINER_RUNNING: + // Container was in running state, but its task has been deleted, + // set unknown exited state. Container io is not needed in this case. + status.FinishedAt = time.Now().UnixNano() + status.ExitCode = unknownExitCode + status.Reason = unknownExitReason + default: + // Container is in exited/unknown state, return the status as it is. + } + } else { + // Task status is found. Update container status based on the up-to-date task status. + switch s.Status { + case containerd.Created: + // Task has been created, but not started yet. This could only happen if containerd + // gets restarted during container start. + // Container must be in `CREATED` state. + if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to delete task: %w", err) + } + if status.State() != runtime.ContainerState_CONTAINER_CREATED { + return fmt.Errorf("unexpected container state for created task: %q", status.State()) + } + case containerd.Running: + // Task is running. Container must be in `RUNNING` state, based on our assumption that + // "task should not be started when containerd is down". + switch status.State() { + case runtime.ContainerState_CONTAINER_EXITED: + return fmt.Errorf("unexpected container state for running task: %q", status.State()) + case runtime.ContainerState_CONTAINER_RUNNING: + default: + // This may happen if containerd gets restarted after task is started, but + // before status is checkpointed. + status.StartedAt = time.Now().UnixNano() + status.Pid = t.Pid() + } + // Wait for the task for exit monitor. + // wait is a long running background request, no timeout needed. + exitCh, err := t.Wait(ctrdutil.NamespacedContext()) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to wait for task: %w", err) + } + // Container was in running state, but its task has been deleted, + // set unknown exited state. + status.FinishedAt = time.Now().UnixNano() + status.ExitCode = unknownExitCode + status.Reason = unknownExitReason + } else { + // Start exit monitor. + c.eventMonitor.startContainerExitMonitor(context.Background(), id, status.Pid, exitCh) + } + case containerd.Stopped: + // Task is stopped. Update status and delete the task. + if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to delete task: %w", err) + } + status.FinishedAt = s.ExitTime.UnixNano() + status.ExitCode = int32(s.ExitStatus) + default: + return fmt.Errorf("unexpected task status %q", s.Status) + } + } + return nil + }() + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to load container status for %q", id) + // Only set the unknown field in this case, because other fields may + // contain useful information loaded from the checkpoint. + status.Unknown = true + } + opts := []containerstore.Opts{ + containerstore.WithStatus(status, containerDir), + containerstore.WithContainer(cntr), + } + // containerIO could be nil for container in unknown state. + if containerIO != nil { + opts = append(opts, containerstore.WithContainerIO(containerIO)) + } + return containerstore.NewContainer(*meta, opts...) +} + +// loadSandbox loads sandbox from containerd. +func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) { + ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout) + defer cancel() + var sandbox sandboxstore.Sandbox + // Load sandbox metadata. + exts, err := cntr.Extensions(ctx) + if err != nil { + return sandbox, fmt.Errorf("failed to get sandbox container extensions: %w", err) + } + ext, ok := exts[sandboxMetadataExtension] + if !ok { + return sandbox, fmt.Errorf("metadata extension %q not found", sandboxMetadataExtension) + } + data, err := typeurl.UnmarshalAny(ext) + if err != nil { + return sandbox, fmt.Errorf("failed to unmarshal metadata extension %q: %w", ext, err) + } + meta := data.(*sandboxstore.Metadata) + + s, err := func() (sandboxstore.Status, error) { + status := unknownSandboxStatus() + // Load sandbox created timestamp. + info, err := cntr.Info(ctx) + if err != nil { + return status, fmt.Errorf("failed to get sandbox container info: %w", err) + } + status.CreatedAt = info.CreatedAt + + // Load sandbox state. + t, err := cntr.Task(ctx, nil) + if err != nil && !errdefs.IsNotFound(err) { + return status, fmt.Errorf("failed to load task: %w", err) + } + var taskStatus containerd.Status + var notFound bool + if errdefs.IsNotFound(err) { + // Task is not found. + notFound = true + } else { + // Task is found. Get task status. + taskStatus, err = t.Status(ctx) + if err != nil { + // It's still possible that task is deleted during this window. + if !errdefs.IsNotFound(err) { + return status, fmt.Errorf("failed to get task status: %w", err) + } + notFound = true + } + } + if notFound { + // Task does not exist, set sandbox state as NOTREADY. + status.State = sandboxstore.StateNotReady + } else { + if taskStatus.Status == containerd.Running { + // Wait for the task for sandbox monitor. + // wait is a long running background request, no timeout needed. + exitCh, err := t.Wait(ctrdutil.NamespacedContext()) + if err != nil { + if !errdefs.IsNotFound(err) { + return status, fmt.Errorf("failed to wait for task: %w", err) + } + status.State = sandboxstore.StateNotReady + } else { + // Task is running, set sandbox state as READY. + status.State = sandboxstore.StateReady + status.Pid = t.Pid() + c.eventMonitor.startSandboxExitMonitor(context.Background(), meta.ID, status.Pid, exitCh) + } + } else { + // Task is not running. Delete the task and set sandbox state as NOTREADY. + if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { + return status, fmt.Errorf("failed to delete task: %w", err) + } + status.State = sandboxstore.StateNotReady + } + } + return status, nil + }() + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to load sandbox status for %q", cntr.ID()) + } + + sandbox = sandboxstore.NewSandbox(*meta, s) + sandbox.Container = cntr + + // Load network namespace. + if goruntime.GOOS != "windows" && + meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + // Don't need to load netns for host network sandbox. + return sandbox, nil + } + if goruntime.GOOS == "windows" && meta.Config.GetWindows().GetSecurityContext().GetHostProcess() { + return sandbox, nil + } + sandbox.NetNS = netns.LoadNetNS(meta.NetNSPath) + + // It doesn't matter whether task is running or not. If it is running, sandbox + // status will be `READY`; if it is not running, sandbox status will be `NOT_READY`, + // kubelet will stop the sandbox which will properly cleanup everything. + return sandbox, nil +} + +// loadImages loads images from containerd. +func (c *criService) loadImages(ctx context.Context, cImages []containerd.Image) { + snapshotter := c.config.ContainerdConfig.Snapshotter + var wg sync.WaitGroup + for _, i := range cImages { + wg.Add(1) + i := i + go func() { + defer wg.Done() + ok, _, _, _, err := containerdimages.Check(ctx, i.ContentStore(), i.Target(), platforms.Default()) + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to check image content readiness for %q", i.Name()) + return + } + if !ok { + log.G(ctx).Warnf("The image content readiness for %q is not ok", i.Name()) + return + } + // Checking existence of top-level snapshot for each image being recovered. + unpacked, err := i.IsUnpacked(ctx, snapshotter) + if err != nil { + log.G(ctx).WithError(err).Warnf("Failed to check whether image is unpacked for image %s", i.Name()) + return + } + if !unpacked { + log.G(ctx).Warnf("The image %s is not unpacked.", i.Name()) + // TODO(random-liu): Consider whether we should try unpack here. + } + if err := c.updateImage(ctx, i.Name()); err != nil { + log.G(ctx).WithError(err).Warnf("Failed to update reference for image %q", i.Name()) + return + } + log.G(ctx).Debugf("Loaded image %q", i.Name()) + }() + } + wg.Wait() +} + +func cleanupOrphanedIDDirs(ctx context.Context, cntrs []containerd.Container, base string) error { + // Cleanup orphaned id directories. + dirs, err := os.ReadDir(base) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to read base directory: %w", err) + } + idsMap := make(map[string]containerd.Container) + for _, cntr := range cntrs { + idsMap[cntr.ID()] = cntr + } + for _, d := range dirs { + if !d.IsDir() { + log.G(ctx).Warnf("Invalid file %q found in base directory %q", d.Name(), base) + continue + } + if _, ok := idsMap[d.Name()]; ok { + // Do not remove id directory if corresponding container is found. + continue + } + dir := filepath.Join(base, d.Name()) + if err := ensureRemoveAll(ctx, dir); err != nil { + log.G(ctx).WithError(err).Warnf("Failed to remove id directory %q", dir) + } else { + log.G(ctx).Debugf("Cleanup orphaned id directory %q", dir) + } + } + return nil +} diff --git a/pkg/cri/sbserver/sandbox_list.go b/pkg/cri/sbserver/sandbox_list.go new file mode 100644 index 000000000..4cae5a6f4 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_list.go @@ -0,0 +1,112 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "time" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +// ListPodSandbox returns a list of Sandbox. +func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) { + start := time.Now() + // List all sandboxes from store. + sandboxesInStore := c.sandboxStore.List() + var sandboxes []*runtime.PodSandbox + for _, sandboxInStore := range sandboxesInStore { + sandboxes = append(sandboxes, toCRISandbox( + sandboxInStore.Metadata, + sandboxInStore.Status.Get(), + )) + } + + sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter()) + + sandboxListTimer.UpdateSince(start) + return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil +} + +// toCRISandbox converts sandbox metadata into CRI pod sandbox. +func toCRISandbox(meta sandboxstore.Metadata, status sandboxstore.Status) *runtime.PodSandbox { + // Set sandbox state to NOTREADY by default. + state := runtime.PodSandboxState_SANDBOX_NOTREADY + if status.State == sandboxstore.StateReady { + state = runtime.PodSandboxState_SANDBOX_READY + } + return &runtime.PodSandbox{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + State: state, + CreatedAt: status.CreatedAt.UnixNano(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + RuntimeHandler: meta.RuntimeHandler, + } +} + +func (c *criService) normalizePodSandboxFilter(filter *runtime.PodSandboxFilter) { + if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil { + filter.Id = sb.ID + } +} + +func (c *criService) normalizePodSandboxStatsFilter(filter *runtime.PodSandboxStatsFilter) { + if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil { + filter.Id = sb.ID + } +} + +// filterCRISandboxes filters CRISandboxes. +func (c *criService) filterCRISandboxes(sandboxes []*runtime.PodSandbox, filter *runtime.PodSandboxFilter) []*runtime.PodSandbox { + if filter == nil { + return sandboxes + } + + c.normalizePodSandboxFilter(filter) + filtered := []*runtime.PodSandbox{} + for _, s := range sandboxes { + // Filter by id + if filter.GetId() != "" && filter.GetId() != s.Id { + continue + } + // Filter by state + if filter.GetState() != nil && filter.GetState().GetState() != s.State { + continue + } + // Filter by label + if filter.GetLabelSelector() != nil { + match := true + for k, v := range filter.GetLabelSelector() { + got, ok := s.Labels[k] + if !ok || got != v { + match = false + break + } + } + if !match { + continue + } + } + filtered = append(filtered, s) + } + + return filtered +} diff --git a/pkg/cri/sbserver/sandbox_list_test.go b/pkg/cri/sbserver/sandbox_list_test.go new file mode 100644 index 000000000..f120664bc --- /dev/null +++ b/pkg/cri/sbserver/sandbox_list_test.go @@ -0,0 +1,211 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func TestToCRISandbox(t *testing.T) { + config := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-name", + Uid: "test-uid", + Namespace: "test-ns", + Attempt: 1, + }, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + } + createdAt := time.Now() + meta := sandboxstore.Metadata{ + ID: "test-id", + Name: "test-name", + Config: config, + NetNSPath: "test-netns", + RuntimeHandler: "test-runtime-handler", + } + expect := &runtime.PodSandbox{ + Id: "test-id", + Metadata: config.GetMetadata(), + CreatedAt: createdAt.UnixNano(), + Labels: config.GetLabels(), + Annotations: config.GetAnnotations(), + RuntimeHandler: "test-runtime-handler", + } + for desc, test := range map[string]struct { + state sandboxstore.State + expectedState runtime.PodSandboxState + }{ + "sandbox state ready": { + state: sandboxstore.StateReady, + expectedState: runtime.PodSandboxState_SANDBOX_READY, + }, + "sandbox state not ready": { + state: sandboxstore.StateNotReady, + expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, + }, + "sandbox state unknown": { + state: sandboxstore.StateUnknown, + expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, + }, + } { + t.Run(desc, func(t *testing.T) { + status := sandboxstore.Status{ + CreatedAt: createdAt, + State: test.state, + } + expect.State = test.expectedState + s := toCRISandbox(meta, status) + assert.Equal(t, expect, s, desc) + }) + } +} + +func TestFilterSandboxes(t *testing.T) { + c := newTestCRIService() + sandboxes := []sandboxstore.Sandbox{ + sandboxstore.NewSandbox( + sandboxstore.Metadata{ + ID: "1abcdef", + Name: "sandboxname-1", + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "podname-1", + Uid: "uid-1", + Namespace: "ns-1", + Attempt: 1, + }, + }, + RuntimeHandler: "test-runtime-handler", + }, + sandboxstore.Status{ + CreatedAt: time.Now(), + State: sandboxstore.StateReady, + }, + ), + sandboxstore.NewSandbox( + sandboxstore.Metadata{ + ID: "2abcdef", + Name: "sandboxname-2", + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "podname-2", + Uid: "uid-2", + Namespace: "ns-2", + Attempt: 2, + }, + Labels: map[string]string{"a": "b"}, + }, + RuntimeHandler: "test-runtime-handler", + }, + sandboxstore.Status{ + CreatedAt: time.Now(), + State: sandboxstore.StateNotReady, + }, + ), + sandboxstore.NewSandbox( + sandboxstore.Metadata{ + ID: "3abcdef", + Name: "sandboxname-3", + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "podname-2", + Uid: "uid-2", + Namespace: "ns-2", + Attempt: 2, + }, + Labels: map[string]string{"c": "d"}, + }, + RuntimeHandler: "test-runtime-handler", + }, + sandboxstore.Status{ + CreatedAt: time.Now(), + State: sandboxstore.StateReady, + }, + ), + } + + // Create PodSandbox + testSandboxes := []*runtime.PodSandbox{} + for _, sb := range sandboxes { + testSandboxes = append(testSandboxes, toCRISandbox(sb.Metadata, sb.Status.Get())) + } + + // Inject test sandbox metadata + for _, sb := range sandboxes { + assert.NoError(t, c.sandboxStore.Add(sb)) + } + + for desc, test := range map[string]struct { + filter *runtime.PodSandboxFilter + expect []*runtime.PodSandbox + }{ + "no filter": { + expect: testSandboxes, + }, + "id filter": { + filter: &runtime.PodSandboxFilter{Id: "2abcdef"}, + expect: []*runtime.PodSandbox{testSandboxes[1]}, + }, + "truncid filter": { + filter: &runtime.PodSandboxFilter{Id: "2"}, + expect: []*runtime.PodSandbox{testSandboxes[1]}, + }, + "state filter": { + filter: &runtime.PodSandboxFilter{ + State: &runtime.PodSandboxStateValue{ + State: runtime.PodSandboxState_SANDBOX_READY, + }, + }, + expect: []*runtime.PodSandbox{testSandboxes[0], testSandboxes[2]}, + }, + "label filter": { + filter: &runtime.PodSandboxFilter{ + LabelSelector: map[string]string{"a": "b"}, + }, + expect: []*runtime.PodSandbox{testSandboxes[1]}, + }, + "mixed filter not matched": { + filter: &runtime.PodSandboxFilter{ + Id: "1", + LabelSelector: map[string]string{"a": "b"}, + }, + expect: []*runtime.PodSandbox{}, + }, + "mixed filter matched": { + filter: &runtime.PodSandboxFilter{ + State: &runtime.PodSandboxStateValue{ + State: runtime.PodSandboxState_SANDBOX_READY, + }, + LabelSelector: map[string]string{"c": "d"}, + }, + expect: []*runtime.PodSandbox{testSandboxes[2]}, + }, + } { + t.Run(desc, func(t *testing.T) { + filtered := c.filterCRISandboxes(testSandboxes, test.filter) + assert.Equal(t, test.expect, filtered, desc) + }) + } +} diff --git a/pkg/cri/sbserver/sandbox_portforward.go b/pkg/cri/sbserver/sandbox_portforward.go new file mode 100644 index 000000000..b87ab89c9 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_portforward.go @@ -0,0 +1,40 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +// PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address. +func (c *criService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (retRes *runtime.PortForwardResponse, retErr error) { + sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) + if err != nil { + return nil, fmt.Errorf("failed to find sandbox %q: %w", r.GetPodSandboxId(), err) + } + if sandbox.Status.Get().State != sandboxstore.StateReady { + return nil, errors.New("sandbox container is not running") + } + // TODO(random-liu): Verify that ports are exposed. + return c.streamServer.GetPortForward(r) +} diff --git a/pkg/cri/sbserver/sandbox_portforward_linux.go b/pkg/cri/sbserver/sandbox_portforward_linux.go new file mode 100644 index 000000000..e4a02208c --- /dev/null +++ b/pkg/cri/sbserver/sandbox_portforward_linux.go @@ -0,0 +1,137 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "io" + "net" + "time" + + "github.com/containerd/containerd/log" + "github.com/containernetworking/plugins/pkg/ns" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// portForward uses netns to enter the sandbox namespace, and forwards a stream inside the +// the namespace to a specific port. It keeps forwarding until it exits or client disconnect. +func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriteCloser) error { + s, err := c.sandboxStore.Get(id) + if err != nil { + return fmt.Errorf("failed to find sandbox %q in store: %w", id, err) + } + + var netNSDo func(func(ns.NetNS) error) error + // netNSPath is the network namespace path for logging. + var netNSPath string + securityContext := s.Config.GetLinux().GetSecurityContext() + hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE + if !hostNet { + if closed, err := s.NetNS.Closed(); err != nil { + return fmt.Errorf("failed to check netwok namespace closed for sandbox %q: %w", id, err) + } else if closed { + return fmt.Errorf("network namespace for sandbox %q is closed", id) + } + netNSDo = s.NetNS.Do + netNSPath = s.NetNS.GetPath() + } else { + // Run the function directly for host network. + netNSDo = func(do func(_ ns.NetNS) error) error { + return do(nil) + } + netNSPath = "host" + } + + log.G(ctx).Infof("Executing port forwarding in network namespace %q", netNSPath) + err = netNSDo(func(_ ns.NetNS) error { + defer stream.Close() + // localhost can resolve to both IPv4 and IPv6 addresses in dual-stack systems + // but the application can be listening in one of the IP families only. + // golang has enabled RFC 6555 Fast Fallback (aka HappyEyeballs) by default in 1.12 + // It means that if a host resolves to both IPv6 and IPv4, it will try to connect to any + // of those addresses and use the working connection. + // However, the implementation uses go routines to start both connections in parallel, + // and this cases that the connection is done outside the namespace, so we try to connect + // serially. + // We try IPv4 first to keep current behavior and we fallback to IPv6 if the connection fails. + // xref https://github.com/golang/go/issues/44922 + var conn net.Conn + conn, err := net.Dial("tcp4", fmt.Sprintf("localhost:%d", port)) + if err != nil { + var errV6 error + conn, errV6 = net.Dial("tcp6", fmt.Sprintf("localhost:%d", port)) + if errV6 != nil { + return fmt.Errorf("failed to connect to localhost:%d inside namespace %q, IPv4: %v IPv6 %v ", port, id, err, errV6) + } + } + defer conn.Close() + + errCh := make(chan error, 2) + // Copy from the the namespace port connection to the client stream + go func() { + log.G(ctx).Debugf("PortForward copying data from namespace %q port %d to the client stream", id, port) + _, err := io.Copy(stream, conn) + errCh <- err + }() + + // Copy from the client stream to the namespace port connection + go func() { + log.G(ctx).Debugf("PortForward copying data from client stream to namespace %q port %d", id, port) + _, err := io.Copy(conn, stream) + errCh <- err + }() + + // Wait until the first error is returned by one of the connections + // we use errFwd to store the result of the port forwarding operation + // if the context is cancelled close everything and return + var errFwd error + select { + case errFwd = <-errCh: + log.G(ctx).Debugf("PortForward stop forwarding in one direction in network namespace %q port %d: %v", id, port, errFwd) + case <-ctx.Done(): + log.G(ctx).Debugf("PortForward cancelled in network namespace %q port %d: %v", id, port, ctx.Err()) + return ctx.Err() + } + // give a chance to terminate gracefully or timeout + // after 1s + // https://linux.die.net/man/1/socat + const timeout = time.Second + select { + case e := <-errCh: + if errFwd == nil { + errFwd = e + } + log.G(ctx).Debugf("PortForward stopped forwarding in both directions in network namespace %q port %d: %v", id, port, e) + case <-time.After(timeout): + log.G(ctx).Debugf("PortForward timed out waiting to close the connection in network namespace %q port %d", id, port) + case <-ctx.Done(): + log.G(ctx).Debugf("PortForward cancelled in network namespace %q port %d: %v", id, port, ctx.Err()) + errFwd = ctx.Err() + } + + return errFwd + }) + + if err != nil { + return fmt.Errorf("failed to execute portforward in network namespace %q: %w", netNSPath, err) + } + log.G(ctx).Infof("Finish port forwarding for %q port %d", id, port) + + return nil +} diff --git a/pkg/cri/sbserver/sandbox_portforward_other.go b/pkg/cri/sbserver/sandbox_portforward_other.go new file mode 100644 index 000000000..649696107 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_portforward_other.go @@ -0,0 +1,34 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "io" + + "github.com/containerd/containerd/errdefs" +) + +// portForward uses netns to enter the sandbox namespace, and forwards a stream inside the +// the namespace to a specific port. It keeps forwarding until it exits or client disconnect. +func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriteCloser) error { + return fmt.Errorf("port forward: %w", errdefs.ErrNotImplemented) +} diff --git a/pkg/cri/sbserver/sandbox_portforward_windows.go b/pkg/cri/sbserver/sandbox_portforward_windows.go new file mode 100644 index 000000000..876a36c51 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_portforward_windows.go @@ -0,0 +1,77 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "bytes" + "context" + "fmt" + "io" + + "k8s.io/utils/exec" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + cioutil "github.com/containerd/containerd/pkg/ioutil" +) + +func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error { + stdout := cioutil.NewNopWriteCloser(stream) + stderrBuffer := new(bytes.Buffer) + stderr := cioutil.NewNopWriteCloser(stderrBuffer) + // localhost is resolved to 127.0.0.1 in ipv4, and ::1 in ipv6. + // Explicitly using ipv4 IP address in here to avoid flakiness. + cmd := []string{"wincat.exe", "127.0.0.1", fmt.Sprint(port)} + err := c.execInSandbox(ctx, id, cmd, stream, stdout, stderr) + if err != nil { + return fmt.Errorf("failed to execute port forward in sandbox: %s: %w", stderrBuffer.String(), err) + } + return nil +} + +func (c *criService) execInSandbox(ctx context.Context, sandboxID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser) error { + // Get sandbox from our sandbox store. + sb, err := c.sandboxStore.Get(sandboxID) + if err != nil { + return fmt.Errorf("failed to find sandbox %q in store: %w", sandboxID, err) + } + + // Check the sandbox state + state := sb.Status.Get().State + if state != sandboxstore.StateReady { + return fmt.Errorf("sandbox is in %s state", fmt.Sprint(state)) + } + + opts := execOptions{ + cmd: cmd, + stdin: stdin, + stdout: stdout, + stderr: stderr, + tty: false, + resize: nil, + } + exitCode, err := c.execInternal(ctx, sb.Container, sandboxID, opts) + if err != nil { + return fmt.Errorf("failed to exec in sandbox: %w", err) + } + if *exitCode == 0 { + return nil + } + return &exec.CodeExitError{ + Err: fmt.Errorf("error executing command %v, exit code %d", cmd, *exitCode), + Code: int(*exitCode), + } +} diff --git a/pkg/cri/sbserver/sandbox_remove.go b/pkg/cri/sbserver/sandbox_remove.go new file mode 100644 index 000000000..5771a7a24 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_remove.go @@ -0,0 +1,117 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "time" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + + "github.com/sirupsen/logrus" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// RemovePodSandbox removes the sandbox. If there are running containers in the +// sandbox, they should be forcibly removed. +func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) { + start := time.Now() + sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) + if err != nil { + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("an error occurred when try to find sandbox %q: %w", + r.GetPodSandboxId(), err) + } + // Do not return error if the id doesn't exist. + log.G(ctx).Tracef("RemovePodSandbox called for sandbox %q that does not exist", + r.GetPodSandboxId()) + return &runtime.RemovePodSandboxResponse{}, nil + } + // Use the full sandbox id. + id := sandbox.ID + + // If the sandbox is still running, not ready, or in an unknown state, forcibly stop it. + // Even if it's in a NotReady state, this will close its network namespace, if open. + // This can happen if the task process associated with the Pod died or it was killed. + logrus.Infof("Forcibly stopping sandbox %q", id) + if err := c.stopPodSandbox(ctx, sandbox); err != nil { + return nil, fmt.Errorf("failed to forcibly stop sandbox %q: %w", id, err) + } + + // Return error if sandbox network namespace is not closed yet. + if sandbox.NetNS != nil { + nsPath := sandbox.NetNS.GetPath() + if closed, err := sandbox.NetNS.Closed(); err != nil { + return nil, fmt.Errorf("failed to check sandbox network namespace %q closed: %w", nsPath, err) + } else if !closed { + return nil, fmt.Errorf("sandbox network namespace %q is not fully closed", nsPath) + } + } + + // Remove all containers inside the sandbox. + // NOTE(random-liu): container could still be created after this point, Kubelet should + // not rely on this behavior. + // TODO(random-liu): Introduce an intermediate state to avoid container creation after + // this point. + cntrs := c.containerStore.List() + for _, cntr := range cntrs { + if cntr.SandboxID != id { + continue + } + _, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID}) + if err != nil { + return nil, fmt.Errorf("failed to remove container %q: %w", cntr.ID, err) + } + } + + // Cleanup the sandbox root directories. + sandboxRootDir := c.getSandboxRootDir(id) + if err := ensureRemoveAll(ctx, sandboxRootDir); err != nil { + return nil, fmt.Errorf("failed to remove sandbox root directory %q: %w", + sandboxRootDir, err) + } + volatileSandboxRootDir := c.getVolatileSandboxRootDir(id) + if err := ensureRemoveAll(ctx, volatileSandboxRootDir); err != nil { + return nil, fmt.Errorf("failed to remove volatile sandbox root directory %q: %w", + volatileSandboxRootDir, err) + } + + // Delete sandbox container. + if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil { + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to delete sandbox container %q: %w", id, err) + } + log.G(ctx).Tracef("Remove called for sandbox container %q that does not exist", id) + } + + // Remove sandbox from sandbox store. Note that once the sandbox is successfully + // deleted: + // 1) ListPodSandbox will not include this sandbox. + // 2) PodSandboxStatus and StopPodSandbox will return error. + // 3) On-going operations which have held the reference will not be affected. + c.sandboxStore.Delete(id) + + // Release the sandbox name reserved for the sandbox. + c.sandboxNameIndex.ReleaseByKey(id) + + sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start) + + return &runtime.RemovePodSandboxResponse{}, nil +} diff --git a/pkg/cri/sbserver/sandbox_run.go b/pkg/cri/sbserver/sandbox_run.go new file mode 100644 index 000000000..39f4a63a6 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run.go @@ -0,0 +1,609 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "math" + "path/filepath" + goruntime "runtime" + "strings" + "time" + + "github.com/containerd/go-cni" + "github.com/containerd/nri" + v1 "github.com/containerd/nri/types/v1" + "github.com/containerd/typeurl" + "github.com/davecgh/go-spew/spew" + "github.com/opencontainers/selinux/go-selinux" + "github.com/sirupsen/logrus" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd" + containerdio "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/pkg/cri/annotations" + criconfig "github.com/containerd/containerd/pkg/cri/config" + customopts "github.com/containerd/containerd/pkg/cri/opts" + "github.com/containerd/containerd/pkg/cri/server/bandwidth" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/containerd/pkg/cri/util" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + "github.com/containerd/containerd/pkg/netns" + "github.com/containerd/containerd/snapshots" +) + +func init() { + typeurl.Register(&sandboxstore.Metadata{}, + "github.com/containerd/cri/pkg/store/sandbox", "Metadata") +} + +// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure +// the sandbox is in ready state. +func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) { + config := r.GetConfig() + log.G(ctx).Debugf("Sandbox config %+v", config) + + // Generate unique id and name for the sandbox and reserve the name. + id := util.GenerateID() + metadata := config.GetMetadata() + if metadata == nil { + return nil, errors.New("sandbox config must include metadata") + } + name := makeSandboxName(metadata) + log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name) + // Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the + // same sandbox. + if err := c.sandboxNameIndex.Reserve(name, id); err != nil { + return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err) + } + defer func() { + // Release the name if the function returns with an error. + if retErr != nil { + c.sandboxNameIndex.ReleaseByName(name) + } + }() + + // Create initial internal sandbox object. + sandbox := sandboxstore.NewSandbox( + sandboxstore.Metadata{ + ID: id, + Name: name, + Config: config, + RuntimeHandler: r.GetRuntimeHandler(), + }, + sandboxstore.Status{ + State: sandboxstore.StateUnknown, + }, + ) + + // Ensure sandbox container image snapshot. + image, err := c.ensureImageExists(ctx, c.config.SandboxImage, config) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox image %q: %w", c.config.SandboxImage, err) + } + containerdImage, err := c.toContainerdImage(ctx, *image) + if err != nil { + return nil, fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err) + } + + ociRuntime, err := c.getSandboxRuntime(config, r.GetRuntimeHandler()) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) + } + log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime) + + podNetwork := true + + if goruntime.GOOS != "windows" && + config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + // Pod network is not needed on linux with host network. + podNetwork = false + } + if goruntime.GOOS == "windows" && + config.GetWindows().GetSecurityContext().GetHostProcess() { + //Windows HostProcess pods can only run on the host network + podNetwork = false + } + + if podNetwork { + netStart := time.Now() + // If it is not in host network namespace then create a namespace and set the sandbox + // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network + // namespaces. If the pod is in host network namespace then both are empty and should not + // be used. + var netnsMountDir = "/var/run/netns" + if c.config.NetNSMountsUnderStateDir { + netnsMountDir = filepath.Join(c.config.StateDir, "netns") + } + sandbox.NetNS, err = netns.NewNetNS(netnsMountDir) + if err != nil { + return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err) + } + sandbox.NetNSPath = sandbox.NetNS.GetPath() + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + // Teardown network if an error is returned. + if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id) + } + + if err := sandbox.NetNS.Remove(); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) + } + sandbox.NetNSPath = "" + } + }() + + // Setup network for sandbox. + // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524) + // rely on the assumption that CRI shim will not be querying the network namespace to check the + // network states such as IP. + // In future runtime implementation should avoid relying on CRI shim implementation details. + // In this case however caching the IP will add a subtle performance enhancement by avoiding + // calls to network namespace of the pod to query the IP of the veth interface on every + // SandboxStatus request. + if err := c.setupPodNetwork(ctx, &sandbox); err != nil { + return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err) + } + sandboxCreateNetworkTimer.UpdateSince(netStart) + } + + runtimeStart := time.Now() + // Create sandbox container. + // NOTE: sandboxContainerSpec SHOULD NOT have side + // effect, e.g. accessing/creating files, so that we can test + // it safely. + spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations) + if err != nil { + return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err) + } + log.G(ctx).WithField("podsandboxid", id).Debugf("sandbox container spec: %#+v", spew.NewFormatter(spec)) + sandbox.ProcessLabel = spec.Process.SelinuxLabel + defer func() { + if retErr != nil { + selinux.ReleaseLabel(sandbox.ProcessLabel) + } + }() + + // handle any KVM based runtime + if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil { + return nil, err + } + + if config.GetLinux().GetSecurityContext().GetPrivileged() { + // If privileged don't set selinux label, but we still record the MCS label so that + // the unused label can be freed later. + spec.Process.SelinuxLabel = "" + } + + // Generate spec options that will be applied to the spec later. + specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config) + if err != nil { + return nil, fmt.Errorf("failed to generate sandbox container spec options: %w", err) + } + + sandboxLabels := buildLabels(config.Labels, image.ImageSpec.Config.Labels, containerKindSandbox) + + runtimeOpts, err := generateRuntimeOptions(ociRuntime, c.config) + if err != nil { + return nil, fmt.Errorf("failed to generate runtime options: %w", err) + } + snapshotterOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations)) + opts := []containerd.NewContainerOpts{ + containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)), + customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt), + containerd.WithSpec(spec, specOpts...), + containerd.WithContainerLabels(sandboxLabels), + containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata), + containerd.WithRuntime(ociRuntime.Type, runtimeOpts)} + + container, err := c.client.NewContainer(ctx, id, opts...) + if err != nil { + return nil, fmt.Errorf("failed to create containerd container: %w", err) + } + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id) + } + } + }() + + // Create sandbox container root directories. + sandboxRootDir := c.getSandboxRootDir(id) + if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create sandbox root directory %q: %w", + sandboxRootDir, err) + } + defer func() { + if retErr != nil { + // Cleanup the sandbox root directory. + if err := c.os.RemoveAll(sandboxRootDir); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to remove sandbox root directory %q", + sandboxRootDir) + } + } + }() + volatileSandboxRootDir := c.getVolatileSandboxRootDir(id) + if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create volatile sandbox root directory %q: %w", + volatileSandboxRootDir, err) + } + defer func() { + if retErr != nil { + // Cleanup the volatile sandbox root directory. + if err := c.os.RemoveAll(volatileSandboxRootDir); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to remove volatile sandbox root directory %q", + volatileSandboxRootDir) + } + } + }() + + // Setup files required for the sandbox. + if err = c.setupSandboxFiles(id, config); err != nil { + return nil, fmt.Errorf("failed to setup sandbox files: %w", err) + } + defer func() { + if retErr != nil { + if err = c.cleanupSandboxFiles(id, config); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to cleanup sandbox files in %q", + sandboxRootDir) + } + } + }() + + // Update sandbox created timestamp. + info, err := container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox container info: %w", err) + } + + // Create sandbox task in containerd. + log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).", + id, name) + + taskOpts := c.taskOpts(ociRuntime.Type) + if ociRuntime.Path != "" { + taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path)) + } + // We don't need stdio for sandbox container. + task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...) + if err != nil { + return nil, fmt.Errorf("failed to create containerd task: %w", err) + } + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + // Cleanup the sandbox container if an error is returned. + if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { + log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id) + } + } + }() + + // wait is a long running background request, no timeout needed. + exitCh, err := task.Wait(ctrdutil.NamespacedContext()) + if err != nil { + return nil, fmt.Errorf("failed to wait for sandbox container task: %w", err) + } + + nric, err := nri.New() + if err != nil { + return nil, fmt.Errorf("unable to create nri client: %w", err) + } + if nric != nil { + nriSB := &nri.Sandbox{ + ID: id, + Labels: config.Labels, + } + if _, err := nric.InvokeWithSandbox(ctx, task, v1.Create, nriSB); err != nil { + return nil, fmt.Errorf("nri invoke: %w", err) + } + } + + if err := task.Start(ctx); err != nil { + return nil, fmt.Errorf("failed to start sandbox container task %q: %w", id, err) + } + + if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { + // Set the pod sandbox as ready after successfully start sandbox container. + status.Pid = task.Pid() + status.State = sandboxstore.StateReady + status.CreatedAt = info.CreatedAt + return status, nil + }); err != nil { + return nil, fmt.Errorf("failed to update sandbox status: %w", err) + } + + // Add sandbox into sandbox store in INIT state. + sandbox.Container = container + + if err := c.sandboxStore.Add(sandbox); err != nil { + return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err) + } + + // start the monitor after adding sandbox into the store, this ensures + // that sandbox is in the store, when event monitor receives the TaskExit event. + // + // TaskOOM from containerd may come before sandbox is added to store, + // but we don't care about sandbox TaskOOM right now, so it is fine. + c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh) + + sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart) + + return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil +} + +// getNetworkPlugin returns the network plugin to be used by the runtime class +// defaults to the global CNI options in the CRI config +func (c *criService) getNetworkPlugin(runtimeClass string) cni.CNI { + if c.netPlugin == nil { + return nil + } + i, ok := c.netPlugin[runtimeClass] + if !ok { + if i, ok = c.netPlugin[defaultNetworkPlugin]; !ok { + return nil + } + } + return i +} + +// setupPodNetwork setups up the network for a pod +func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error { + var ( + id = sandbox.ID + config = sandbox.Config + path = sandbox.NetNSPath + netPlugin = c.getNetworkPlugin(sandbox.RuntimeHandler) + ) + if netPlugin == nil { + return errors.New("cni config not initialized") + } + + opts, err := cniNamespaceOpts(id, config) + if err != nil { + return fmt.Errorf("get cni namespace options: %w", err) + } + log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") + result, err := netPlugin.Setup(ctx, id, path, opts...) + if err != nil { + return err + } + logDebugCNIResult(ctx, id, result) + // Check if the default interface has IP config + if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 { + sandbox.IP, sandbox.AdditionalIPs = selectPodIPs(ctx, configs.IPConfigs, c.config.IPPreference) + sandbox.CNIResult = result + return nil + } + return fmt.Errorf("failed to find network info for sandbox %q", id) +} + +// cniNamespaceOpts get CNI namespace options from sandbox config. +func cniNamespaceOpts(id string, config *runtime.PodSandboxConfig) ([]cni.NamespaceOpts, error) { + opts := []cni.NamespaceOpts{ + cni.WithLabels(toCNILabels(id, config)), + cni.WithCapability(annotations.PodAnnotations, config.Annotations), + } + + portMappings := toCNIPortMappings(config.GetPortMappings()) + if len(portMappings) > 0 { + opts = append(opts, cni.WithCapabilityPortMap(portMappings)) + } + + // Will return an error if the bandwidth limitation has the wrong unit + // or an unreasonable value see validateBandwidthIsReasonable() + bandWidth, err := toCNIBandWidth(config.Annotations) + if err != nil { + return nil, err + } + if bandWidth != nil { + opts = append(opts, cni.WithCapabilityBandWidth(*bandWidth)) + } + + dns := toCNIDNS(config.GetDnsConfig()) + if dns != nil { + opts = append(opts, cni.WithCapabilityDNS(*dns)) + } + + return opts, nil +} + +// toCNILabels adds pod metadata into CNI labels. +func toCNILabels(id string, config *runtime.PodSandboxConfig) map[string]string { + return map[string]string{ + "K8S_POD_NAMESPACE": config.GetMetadata().GetNamespace(), + "K8S_POD_NAME": config.GetMetadata().GetName(), + "K8S_POD_INFRA_CONTAINER_ID": id, + "K8S_POD_UID": config.GetMetadata().GetUid(), + "IgnoreUnknown": "1", + } +} + +// toCNIBandWidth converts CRI annotations to CNI bandwidth. +func toCNIBandWidth(annotations map[string]string) (*cni.BandWidth, error) { + ingress, egress, err := bandwidth.ExtractPodBandwidthResources(annotations) + if err != nil { + return nil, fmt.Errorf("reading pod bandwidth annotations: %w", err) + } + + if ingress == nil && egress == nil { + return nil, nil + } + + bandWidth := &cni.BandWidth{} + + if ingress != nil { + bandWidth.IngressRate = uint64(ingress.Value()) + bandWidth.IngressBurst = math.MaxUint32 + } + + if egress != nil { + bandWidth.EgressRate = uint64(egress.Value()) + bandWidth.EgressBurst = math.MaxUint32 + } + + return bandWidth, nil +} + +// toCNIPortMappings converts CRI port mappings to CNI. +func toCNIPortMappings(criPortMappings []*runtime.PortMapping) []cni.PortMapping { + var portMappings []cni.PortMapping + for _, mapping := range criPortMappings { + if mapping.HostPort <= 0 { + continue + } + portMappings = append(portMappings, cni.PortMapping{ + HostPort: mapping.HostPort, + ContainerPort: mapping.ContainerPort, + Protocol: strings.ToLower(mapping.Protocol.String()), + HostIP: mapping.HostIp, + }) + } + return portMappings +} + +// toCNIDNS converts CRI DNSConfig to CNI. +func toCNIDNS(dns *runtime.DNSConfig) *cni.DNS { + if dns == nil { + return nil + } + return &cni.DNS{ + Servers: dns.GetServers(), + Searches: dns.GetSearches(), + Options: dns.GetOptions(), + } +} + +// selectPodIPs select an ip from the ip list. +func selectPodIPs(ctx context.Context, configs []*cni.IPConfig, preference string) (string, []string) { + if len(configs) == 1 { + return ipString(configs[0]), nil + } + toStrings := func(ips []*cni.IPConfig) (o []string) { + for _, i := range ips { + o = append(o, ipString(i)) + } + return o + } + var extra []string + switch preference { + default: + if preference != "ipv4" && preference != "" { + log.G(ctx).WithField("ip_pref", preference).Warn("invalid ip_pref, falling back to ipv4") + } + for i, ip := range configs { + if ip.IP.To4() != nil { + return ipString(ip), append(extra, toStrings(configs[i+1:])...) + } + extra = append(extra, ipString(ip)) + } + case "ipv6": + for i, ip := range configs { + if ip.IP.To16() != nil { + return ipString(ip), append(extra, toStrings(configs[i+1:])...) + } + extra = append(extra, ipString(ip)) + } + case "cni": + // use func default return + } + + all := toStrings(configs) + return all[0], all[1:] +} + +func ipString(ip *cni.IPConfig) string { + return ip.IP.String() +} + +// untrustedWorkload returns true if the sandbox contains untrusted workload. +func untrustedWorkload(config *runtime.PodSandboxConfig) bool { + return config.GetAnnotations()[annotations.UntrustedWorkload] == "true" +} + +// hostAccessingSandbox returns true if the sandbox configuration +// requires additional host access for the sandbox. +func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool { + securityContext := config.GetLinux().GetSecurityContext() + + namespaceOptions := securityContext.GetNamespaceOptions() + if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE || + namespaceOptions.GetPid() == runtime.NamespaceMode_NODE || + namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE { + return true + } + + return false +} + +// getSandboxRuntime returns the runtime configuration for sandbox. +// If the sandbox contains untrusted workload, runtime for untrusted workload will be returned, +// or else default runtime will be returned. +func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtimeHandler string) (criconfig.Runtime, error) { + if untrustedWorkload(config) { + // If the untrusted annotation is provided, runtimeHandler MUST be empty. + if runtimeHandler != "" && runtimeHandler != criconfig.RuntimeUntrusted { + return criconfig.Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed") + } + + // If the untrusted workload is requesting access to the host/node, this request will fail. + // + // Note: If the workload is marked untrusted but requests privileged, this can be granted, as the + // runtime may support this. For example, in a virtual-machine isolated runtime, privileged + // is a supported option, granting the workload to access the entire guest VM instead of host. + // TODO(windows): Deprecate this so that we don't need to handle it for windows. + if hostAccessingSandbox(config) { + return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed") + } + + runtimeHandler = criconfig.RuntimeUntrusted + } + + if runtimeHandler == "" { + runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName + } + + handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler] + if !ok { + return criconfig.Runtime{}, fmt.Errorf("no runtime for %q is configured", runtimeHandler) + } + return handler, nil +} + +func logDebugCNIResult(ctx context.Context, sandboxID string, result *cni.Result) { + if logrus.GetLevel() < logrus.DebugLevel { + return + } + cniResult, err := json.Marshal(result) + if err != nil { + log.G(ctx).WithField("podsandboxid", sandboxID).WithError(err).Errorf("Failed to marshal CNI result: %v", err) + return + } + log.G(ctx).WithField("podsandboxid", sandboxID).Debugf("cni result: %s", string(cniResult)) +} diff --git a/pkg/cri/sbserver/sandbox_run_linux.go b/pkg/cri/sbserver/sandbox_run_linux.go new file mode 100644 index 000000000..461559d39 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_linux.go @@ -0,0 +1,350 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + "os" + "strconv" + "strings" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/plugin" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "golang.org/x/sys/unix" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + customopts "github.com/containerd/containerd/pkg/cri/opts" + osinterface "github.com/containerd/containerd/pkg/os" + "github.com/containerd/containerd/pkg/userns" +) + +func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) { + // Creates a spec Generator with the default spec. + // TODO(random-liu): [P1] Compare the default settings with docker and containerd default. + specOpts := []oci.SpecOpts{ + oci.WithoutRunMount, + customopts.WithoutDefaultSecuritySettings, + customopts.WithRelativeRoot(relativeRootfsPath), + oci.WithEnv(imageConfig.Env), + oci.WithRootFSReadonly(), + oci.WithHostname(config.GetHostname()), + } + if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 { + // Pause image must have entrypoint or cmd. + return nil, fmt.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig) + } + specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...)) + + // Set cgroups parent. + if c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDisabledCgroups) + } else { + if config.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id) + specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) + } + } + + // When cgroup parent is not set, containerd-shim will create container in a child cgroup + // of the cgroup itself is in. + // TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified. + + // Set namespace options. + var ( + securityContext = config.GetLinux().GetSecurityContext() + nsOptions = securityContext.GetNamespaceOptions() + ) + if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE { + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace)) + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace)) + } else { + specOpts = append(specOpts, oci.WithLinuxNamespace( + runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + Path: nsPath, + })) + } + if nsOptions.GetPid() == runtime.NamespaceMode_NODE { + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace)) + } + if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) + } + + // It's fine to generate the spec before the sandbox /dev/shm + // is actually created. + sandboxDevShm := c.getSandboxDevShm(id) + if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { + sandboxDevShm = devShm + } + // Remove the default /dev/shm mount from defaultMounts, it is added in oci/mounts.go. + specOpts = append(specOpts, oci.WithoutMounts(devShm)) + // In future the when user-namespace is enabled, the `nosuid, nodev, noexec` flags are + // required, otherwise the remount will fail with EPERM. Just use them unconditionally, + // they are nice to have anyways. + specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{ + { + Source: sandboxDevShm, + Destination: devShm, + Type: "bind", + Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"}, + }, + // Add resolv.conf for katacontainers to setup the DNS of pod VM properly. + { + Source: c.getResolvPath(id), + Destination: resolvConfPath, + Type: "bind", + Options: []string{"rbind", "ro"}, + }, + })) + + processLabel, mountLabel, err := initLabelsFromOpt(securityContext.GetSelinuxOptions()) + if err != nil { + return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err) + } + defer func() { + if retErr != nil { + selinux.ReleaseLabel(processLabel) + } + }() + + supplementalGroups := securityContext.GetSupplementalGroups() + specOpts = append(specOpts, + customopts.WithSelinuxLabels(processLabel, mountLabel), + customopts.WithSupplementalGroups(supplementalGroups), + ) + + // Add sysctls + sysctls := config.GetLinux().GetSysctls() + if sysctls == nil { + sysctls = make(map[string]string) + } + _, ipUnprivilegedPortStart := sysctls["net.ipv4.ip_unprivileged_port_start"] + _, pingGroupRange := sysctls["net.ipv4.ping_group_range"] + if nsOptions.GetNetwork() != runtime.NamespaceMode_NODE { + if c.config.EnableUnprivilegedPorts && !ipUnprivilegedPortStart { + sysctls["net.ipv4.ip_unprivileged_port_start"] = "0" + } + if c.config.EnableUnprivilegedICMP && !pingGroupRange && !userns.RunningInUserNS() { + sysctls["net.ipv4.ping_group_range"] = "0 2147483647" + } + } + specOpts = append(specOpts, customopts.WithSysctls(sysctls)) + + // Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile + + if !c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDefaultSandboxShares) + } + + if res := config.GetLinux().GetResources(); res != nil { + specOpts = append(specOpts, + customopts.WithAnnotation(annotations.SandboxCPUPeriod, strconv.FormatInt(res.CpuPeriod, 10)), + customopts.WithAnnotation(annotations.SandboxCPUQuota, strconv.FormatInt(res.CpuQuota, 10)), + customopts.WithAnnotation(annotations.SandboxCPUShares, strconv.FormatInt(res.CpuShares, 10)), + customopts.WithAnnotation(annotations.SandboxMem, strconv.FormatInt(res.MemoryLimitInBytes, 10))) + } + + specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj)) + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + runtimePodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, + customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox), + customopts.WithAnnotation(annotations.SandboxID, id), + customopts.WithAnnotation(annotations.SandboxNamespace, config.GetMetadata().GetNamespace()), + customopts.WithAnnotation(annotations.SandboxName, config.GetMetadata().GetName()), + customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()), + ) + + return c.runtimeSpec(id, "", specOpts...) +} + +// sandboxContainerSpecOpts generates OCI spec options for +// the sandbox container. +func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + var ( + securityContext = config.GetLinux().GetSecurityContext() + specOpts []oci.SpecOpts + err error + ) + ssp := securityContext.GetSeccomp() + if ssp == nil { + ssp, err = generateSeccompSecurityProfile( + securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet + c.config.UnsetSeccompProfile) + if err != nil { + return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) + } + } + seccompSpecOpts, err := c.generateSeccompSpecOpts( + ssp, + securityContext.GetPrivileged(), + c.seccompEnabled()) + if err != nil { + return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) + } + if seccompSpecOpts != nil { + specOpts = append(specOpts, seccompSpecOpts) + } + + userstr, err := generateUserString( + "", + securityContext.GetRunAsUser(), + securityContext.GetRunAsGroup(), + ) + if err != nil { + return nil, fmt.Errorf("failed to generate user string: %w", err) + } + if userstr == "" { + // Lastly, since no user override was passed via CRI try to set via OCI + // Image + userstr = imageConfig.User + } + if userstr != "" { + specOpts = append(specOpts, oci.WithUser(userstr)) + } + return specOpts, nil +} + +// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts, +// /etc/resolv.conf and /etc/hostname. +func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + sandboxEtcHostname := c.getSandboxHostname(id) + hostname := config.GetHostname() + if hostname == "" { + var err error + hostname, err = c.os.Hostname() + if err != nil { + return fmt.Errorf("failed to get hostname: %w", err) + } + } + if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil { + return fmt.Errorf("failed to write hostname to %q: %w", sandboxEtcHostname, err) + } + + // TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet. + sandboxEtcHosts := c.getSandboxHosts(id) + if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil { + return fmt.Errorf("failed to generate sandbox hosts file %q: %w", sandboxEtcHosts, err) + } + + // Set DNS options. Maintain a resolv.conf for the sandbox. + var err error + resolvContent := "" + if dnsConfig := config.GetDnsConfig(); dnsConfig != nil { + resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options) + if err != nil { + return fmt.Errorf("failed to parse sandbox DNSConfig %+v: %w", dnsConfig, err) + } + } + resolvPath := c.getResolvPath(id) + if resolvContent == "" { + // copy host's resolv.conf to resolvPath + err = c.os.CopyFile(resolvConfPath, resolvPath, 0644) + if err != nil { + return fmt.Errorf("failed to copy host's resolv.conf to %q: %w", resolvPath, err) + } + } else { + err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644) + if err != nil { + return fmt.Errorf("failed to write resolv content to %q: %w", resolvPath, err) + } + } + + // Setup sandbox /dev/shm. + if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { + if _, err := c.os.Stat(devShm); err != nil { + return fmt.Errorf("host %q is not available for host ipc: %w", devShm, err) + } + } else { + sandboxDevShm := c.getSandboxDevShm(id) + if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil { + return fmt.Errorf("failed to create sandbox shm: %w", err) + } + shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize) + if err := c.os.(osinterface.UNIX).Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil { + return fmt.Errorf("failed to mount sandbox shm: %w", err) + } + } + + return nil +} + +// parseDNSOptions parse DNS options into resolv.conf format content, +// if none option is specified, will return empty with no error. +func parseDNSOptions(servers, searches, options []string) (string, error) { + resolvContent := "" + + if len(searches) > 0 { + resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " ")) + } + + if len(servers) > 0 { + resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver ")) + } + + if len(options) > 0 { + resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " ")) + } + + return resolvContent, nil +} + +// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to +// remove these files. Unmount should *NOT* return error if the mount point is already unmounted. +func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE { + path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/") + if err != nil { + return fmt.Errorf("failed to follow symlink: %w", err) + } + if err := c.os.(osinterface.UNIX).Unmount(path); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to unmount %q: %w", path, err) + } + } + return nil +} + +// taskOpts generates task options for a (sandbox) container. +func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { + // TODO(random-liu): Remove this after shim v1 is deprecated. + var taskOpts []containerd.NewTaskOpts + + // c.config.NoPivot is only supported for RuntimeLinuxV1 = "io.containerd.runtime.v1.linux" legacy linux runtime + // and is not supported for RuntimeRuncV1 = "io.containerd.runc.v1" or RuntimeRuncV2 = "io.containerd.runc.v2" + // for RuncV1/2 no pivot is set under the containerd.runtimes.runc.options config see + // https://github.com/containerd/containerd/blob/v1.3.2/runtime/v2/runc/options/oci.pb.go#L26 + if c.config.NoPivot && runtimeType == plugin.RuntimeLinuxV1 { + taskOpts = append(taskOpts, containerd.WithNoPivotRoot) + } + + return taskOpts +} diff --git a/pkg/cri/sbserver/sandbox_run_linux_test.go b/pkg/cri/sbserver/sandbox_run_linux_test.go new file mode 100644 index 000000000..7059639c7 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_linux_test.go @@ -0,0 +1,526 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "os" + "path/filepath" + "strconv" + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + v1 "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/opts" + ostesting "github.com/containerd/containerd/pkg/os/testing" +) + +func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { + config := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-name", + Uid: "test-uid", + Namespace: "test-ns", + Attempt: 1, + }, + Hostname: "test-hostname", + LogDirectory: "test-log-directory", + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + Linux: &runtime.LinuxPodSandboxConfig{ + CgroupParent: "/test/cgroup/parent", + }, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"a=b", "c=d"}, + Entrypoint: []string{"/pause"}, + Cmd: []string{"forever"}, + WorkingDir: "/workspace", + } + specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { + assert.Equal(t, "test-hostname", spec.Hostname) + assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) + assert.Equal(t, relativeRootfsPath, spec.Root.Path) + assert.Equal(t, true, spec.Root.Readonly) + assert.Contains(t, spec.Process.Env, "a=b", "c=d") + assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args) + assert.Equal(t, "/workspace", spec.Process.Cwd) + assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, opts.DefaultSandboxCPUshares) + assert.EqualValues(t, *spec.Process.OOMScoreAdj, defaultSandboxOOMAdj) + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox) + + assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) + assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-ns") + + assert.Contains(t, spec.Annotations, annotations.SandboxName) + assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-name") + + assert.Contains(t, spec.Annotations, annotations.SandboxLogDir) + assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory") + + if selinux.GetEnabled() { + assert.NotEqual(t, "", spec.Process.SelinuxLabel) + assert.NotEqual(t, "", spec.Linux.MountLabel) + } + } + return config, imageConfig, specCheck +} + +func TestLinuxSandboxContainerSpec(t *testing.T) { + testID := "test-id" + nsPath := "test-cni" + for desc, test := range map[string]struct { + configChange func(*runtime.PodSandboxConfig) + specCheck func(*testing.T, *runtimespec.Spec) + expectErr bool + }{ + "spec should reflect original config": { + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + // runtime spec should have expected namespaces enabled by default. + require.NotNil(t, spec.Linux) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + Path: nsPath, + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UTSNamespace, + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + }) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.IPCNamespace, + }) + assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") + assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") + }, + }, + "host namespace": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_NODE, + Ipc: runtime.NamespaceMode_NODE, + }, + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + // runtime spec should disable expected namespaces in host mode. + require.NotNil(t, spec.Linux) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.NetworkNamespace, + }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UTSNamespace, + }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.PIDNamespace, + }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.IPCNamespace, + }) + assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") + assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") + }, + }, + "should set supplemental groups correctly": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + SupplementalGroups: []int64{1111, 2222}, + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + require.NotNil(t, spec.Process) + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) + assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) + }, + }, + "should overwrite default sysctls": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.Sysctls = map[string]string{ + "net.ipv4.ip_unprivileged_port_start": "500", + "net.ipv4.ping_group_range": "1 1000", + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + require.NotNil(t, spec.Process) + assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "500") + assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "1 1000") + }, + }, + "sandbox sizing annotations should be set if LinuxContainerResources were provided": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.Resources = &v1.LinuxContainerResources{ + CpuPeriod: 100, + CpuQuota: 200, + CpuShares: 5000, + MemoryLimitInBytes: 1024, + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + value, ok := spec.Annotations[annotations.SandboxCPUPeriod] + assert.True(t, ok) + assert.EqualValues(t, strconv.FormatInt(100, 10), value) + assert.EqualValues(t, "100", value) + + value, ok = spec.Annotations[annotations.SandboxCPUQuota] + assert.True(t, ok) + assert.EqualValues(t, "200", value) + + value, ok = spec.Annotations[annotations.SandboxCPUShares] + assert.True(t, ok) + assert.EqualValues(t, "5000", value) + + value, ok = spec.Annotations[annotations.SandboxMem] + assert.True(t, ok) + assert.EqualValues(t, "1024", value) + }, + }, + "sandbox sizing annotations should not be set if LinuxContainerResources were not provided": { + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + _, ok := spec.Annotations[annotations.SandboxCPUPeriod] + assert.False(t, ok) + _, ok = spec.Annotations[annotations.SandboxCPUQuota] + assert.False(t, ok) + _, ok = spec.Annotations[annotations.SandboxCPUShares] + assert.False(t, ok) + _, ok = spec.Annotations[annotations.SandboxMem] + assert.False(t, ok) + }, + }, + "sandbox sizing annotations are zero if the resources are set to 0": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.Resources = &v1.LinuxContainerResources{} + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + value, ok := spec.Annotations[annotations.SandboxCPUPeriod] + assert.True(t, ok) + assert.EqualValues(t, "0", value) + value, ok = spec.Annotations[annotations.SandboxCPUQuota] + assert.True(t, ok) + assert.EqualValues(t, "0", value) + value, ok = spec.Annotations[annotations.SandboxCPUShares] + assert.True(t, ok) + assert.EqualValues(t, "0", value) + value, ok = spec.Annotations[annotations.SandboxMem] + assert.True(t, ok) + assert.EqualValues(t, "0", value) + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.config.EnableUnprivilegedICMP = true + c.config.EnableUnprivilegedPorts = true + config, imageConfig, specCheck := getRunPodSandboxTestData() + if test.configChange != nil { + test.configChange(config) + } + spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil) + if test.expectErr { + assert.Error(t, err) + assert.Nil(t, spec) + return + } + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, spec) + if test.specCheck != nil { + test.specCheck(t, spec) + } + }) + } +} + +func TestSetupSandboxFiles(t *testing.T) { + const ( + testID = "test-id" + realhostname = "test-real-hostname" + ) + for desc, test := range map[string]struct { + dnsConfig *runtime.DNSConfig + hostname string + ipcMode runtime.NamespaceMode + expectedCalls []ostesting.CalledDetail + }{ + "should check host /dev/shm existence when ipc mode is NODE": { + ipcMode: runtime.NamespaceMode_NODE, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "Hostname", + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte(realhostname + "\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/resolv.conf", + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + os.FileMode(0644), + }, + }, + { + Name: "Stat", + Arguments: []interface{}{"/dev/shm"}, + }, + }, + }, + "should create new /etc/resolv.conf if DNSOptions is set": { + dnsConfig: &runtime.DNSConfig{ + Servers: []string{"8.8.8.8"}, + Searches: []string{"114.114.114.114"}, + Options: []string{"timeout:1"}, + }, + ipcMode: runtime.NamespaceMode_NODE, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "Hostname", + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte(realhostname + "\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + []byte(`search 114.114.114.114 +nameserver 8.8.8.8 +options timeout:1 +`), os.FileMode(0644), + }, + }, + { + Name: "Stat", + Arguments: []interface{}{"/dev/shm"}, + }, + }, + }, + "should create sandbox shm when ipc namespace mode is not NODE": { + ipcMode: runtime.NamespaceMode_POD, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "Hostname", + }, + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte(realhostname + "\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/resolv.conf", + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + os.FileMode(0644), + }, + }, + { + Name: "MkdirAll", + Arguments: []interface{}{ + filepath.Join(testStateDir, sandboxesDir, testID, "shm"), + os.FileMode(0700), + }, + }, + { + Name: "Mount", + // Ignore arguments which are too complex to check. + }, + }, + }, + "should create /etc/hostname when hostname is set": { + hostname: "test-hostname", + ipcMode: runtime.NamespaceMode_NODE, + expectedCalls: []ostesting.CalledDetail{ + { + Name: "WriteFile", + Arguments: []interface{}{ + filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), + []byte("test-hostname\n"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/hosts", + filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), + os.FileMode(0644), + }, + }, + { + Name: "CopyFile", + Arguments: []interface{}{ + "/etc/resolv.conf", + filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), + os.FileMode(0644), + }, + }, + { + Name: "Stat", + Arguments: []interface{}{"/dev/shm"}, + }, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { + return realhostname, nil + } + cfg := &runtime.PodSandboxConfig{ + Hostname: test.hostname, + DnsConfig: test.dnsConfig, + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Ipc: test.ipcMode, + }, + }, + }, + } + c.setupSandboxFiles(testID, cfg) + calls := c.os.(*ostesting.FakeOS).GetCalls() + assert.Len(t, calls, len(test.expectedCalls)) + for i, expected := range test.expectedCalls { + if expected.Arguments == nil { + // Ignore arguments. + expected.Arguments = calls[i].Arguments + } + assert.Equal(t, expected, calls[i]) + } + }) + } +} + +func TestParseDNSOption(t *testing.T) { + for desc, test := range map[string]struct { + servers []string + searches []string + options []string + expectedContent string + expectErr bool + }{ + "empty dns options should return empty content": {}, + "non-empty dns options should return correct content": { + servers: []string{"8.8.8.8", "server.google.com"}, + searches: []string{"114.114.114.114"}, + options: []string{"timeout:1"}, + expectedContent: `search 114.114.114.114 +nameserver 8.8.8.8 +nameserver server.google.com +options timeout:1 +`, + }, + "expanded dns config should return correct content on modern libc (e.g. glibc 2.26 and above)": { + servers: []string{"8.8.8.8", "server.google.com"}, + searches: []string{ + "server0.google.com", + "server1.google.com", + "server2.google.com", + "server3.google.com", + "server4.google.com", + "server5.google.com", + "server6.google.com", + }, + options: []string{"timeout:1"}, + expectedContent: `search server0.google.com server1.google.com server2.google.com server3.google.com server4.google.com server5.google.com server6.google.com +nameserver 8.8.8.8 +nameserver server.google.com +options timeout:1 +`, + }, + } { + t.Run(desc, func(t *testing.T) { + resolvContent, err := parseDNSOptions(test.servers, test.searches, test.options) + if test.expectErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + assert.Equal(t, resolvContent, test.expectedContent) + }) + } +} + +func TestSandboxDisableCgroup(t *testing.T) { + config, imageConfig, _ := getRunPodSandboxTestData() + c := newTestCRIService() + c.config.DisableCgroup = true + spec, err := c.sandboxContainerSpec("test-id", config, imageConfig, "test-cni", []string{}) + require.NoError(t, err) + + t.Log("resource limit should not be set") + assert.Nil(t, spec.Linux.Resources.Memory) + assert.Nil(t, spec.Linux.Resources.CPU) + + t.Log("cgroup path should be empty") + assert.Empty(t, spec.Linux.CgroupsPath) +} + +// TODO(random-liu): [P1] Add unit test for different error cases to make sure +// the function cleans up on error properly. diff --git a/pkg/cri/sbserver/sandbox_run_other.go b/pkg/cri/sbserver/sandbox_run_other.go new file mode 100644 index 000000000..e3e0e53ed --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_other.go @@ -0,0 +1,56 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "github.com/containerd/containerd" + "github.com/containerd/containerd/oci" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) { + return c.runtimeSpec(id, "") +} + +// sandboxContainerSpecOpts generates OCI spec options for +// the sandbox container. +func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + return []oci.SpecOpts{}, nil +} + +// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts, +// /etc/resolv.conf and /etc/hostname. +func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + return nil +} + +// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to +// remove these files. Unmount should *NOT* return error if the mount point is already unmounted. +func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + return nil +} + +// taskOpts generates task options for a (sandbox) container. +func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { + return []containerd.NewTaskOpts{} +} diff --git a/pkg/cri/sbserver/sandbox_run_other_test.go b/pkg/cri/sbserver/sandbox_run_other_test.go new file mode 100644 index 000000000..d420e672f --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_other_test.go @@ -0,0 +1,36 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { + config := &runtime.PodSandboxConfig{} + imageConfig := &imagespec.ImageConfig{} + specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { + } + return config, imageConfig, specCheck +} diff --git a/pkg/cri/sbserver/sandbox_run_test.go b/pkg/cri/sbserver/sandbox_run_test.go new file mode 100644 index 000000000..d92544865 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_test.go @@ -0,0 +1,526 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "net" + goruntime "runtime" + "testing" + + "github.com/containerd/go-cni" + "github.com/containerd/typeurl" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + criconfig "github.com/containerd/containerd/pkg/cri/config" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func TestSandboxContainerSpec(t *testing.T) { + switch goruntime.GOOS { + case "darwin": + t.Skip("not implemented on Darwin") + case "freebsd": + t.Skip("not implemented on FreeBSD") + } + testID := "test-id" + nsPath := "test-cni" + for desc, test := range map[string]struct { + configChange func(*runtime.PodSandboxConfig) + podAnnotations []string + imageConfigChange func(*imagespec.ImageConfig) + specCheck func(*testing.T, *runtimespec.Spec) + expectErr bool + }{ + "should return error when entrypoint and cmd are empty": { + imageConfigChange: func(c *imagespec.ImageConfig) { + c.Entrypoint = nil + c.Cmd = nil + }, + expectErr: true, + }, + "a passthrough annotation should be passed as an OCI annotation": { + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + }, + }, + "a non-passthrough annotation should not be passed as an OCI annotation": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["d"] = "e" + }, + podAnnotations: []string{"c"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["c"], "d") + _, ok := spec.Annotations["d"] + assert.False(t, ok) + }, + }, + "passthrough annotations should support wildcard match": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Annotations["t.f"] = "j" + c.Annotations["z.g"] = "o" + c.Annotations["z"] = "o" + c.Annotations["y.ca"] = "b" + c.Annotations["y"] = "b" + }, + podAnnotations: []string{"t*", "z.*", "y.c*"}, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + assert.Equal(t, spec.Annotations["t.f"], "j") + assert.Equal(t, spec.Annotations["z.g"], "o") + assert.Equal(t, spec.Annotations["y.ca"], "b") + _, ok := spec.Annotations["y"] + assert.False(t, ok) + _, ok = spec.Annotations["z"] + assert.False(t, ok) + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + config, imageConfig, specCheck := getRunPodSandboxTestData() + if test.configChange != nil { + test.configChange(config) + } + + if test.imageConfigChange != nil { + test.imageConfigChange(imageConfig) + } + spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, + test.podAnnotations) + if test.expectErr { + assert.Error(t, err) + assert.Nil(t, spec) + return + } + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, spec) + if test.specCheck != nil { + test.specCheck(t, spec) + } + }) + } +} + +func TestTypeurlMarshalUnmarshalSandboxMeta(t *testing.T) { + for desc, test := range map[string]struct { + configChange func(*runtime.PodSandboxConfig) + }{ + "should marshal original config": {}, + "should marshal Linux": { + configChange: func(c *runtime.PodSandboxConfig) { + if c.Linux == nil { + c.Linux = &runtime.LinuxPodSandboxConfig{} + } + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_NODE, + Ipc: runtime.NamespaceMode_NODE, + }, + SupplementalGroups: []int64{1111, 2222}, + } + }, + }, + } { + t.Run(desc, func(t *testing.T) { + meta := &sandboxstore.Metadata{ + ID: "1", + Name: "sandbox_1", + NetNSPath: "/home/cloud", + } + meta.Config, _, _ = getRunPodSandboxTestData() + if test.configChange != nil { + test.configChange(meta.Config) + } + + any, err := typeurl.MarshalAny(meta) + assert.NoError(t, err) + data, err := typeurl.UnmarshalAny(any) + assert.NoError(t, err) + assert.IsType(t, &sandboxstore.Metadata{}, data) + curMeta, ok := data.(*sandboxstore.Metadata) + assert.True(t, ok) + assert.Equal(t, meta, curMeta) + }) + } +} + +func TestToCNIPortMappings(t *testing.T) { + for desc, test := range map[string]struct { + criPortMappings []*runtime.PortMapping + cniPortMappings []cni.PortMapping + }{ + "empty CRI port mapping should map to empty CNI port mapping": {}, + "CRI port mapping should be converted to CNI port mapping properly": { + criPortMappings: []*runtime.PortMapping{ + { + Protocol: runtime.Protocol_UDP, + ContainerPort: 1234, + HostPort: 5678, + HostIp: "123.124.125.126", + }, + { + Protocol: runtime.Protocol_TCP, + ContainerPort: 4321, + HostPort: 8765, + HostIp: "126.125.124.123", + }, + { + Protocol: runtime.Protocol_SCTP, + ContainerPort: 1234, + HostPort: 5678, + HostIp: "123.124.125.126", + }, + }, + cniPortMappings: []cni.PortMapping{ + { + HostPort: 5678, + ContainerPort: 1234, + Protocol: "udp", + HostIP: "123.124.125.126", + }, + { + HostPort: 8765, + ContainerPort: 4321, + Protocol: "tcp", + HostIP: "126.125.124.123", + }, + { + HostPort: 5678, + ContainerPort: 1234, + Protocol: "sctp", + HostIP: "123.124.125.126", + }, + }, + }, + "CRI port mapping without host port should be skipped": { + criPortMappings: []*runtime.PortMapping{ + { + Protocol: runtime.Protocol_UDP, + ContainerPort: 1234, + HostIp: "123.124.125.126", + }, + { + Protocol: runtime.Protocol_TCP, + ContainerPort: 4321, + HostPort: 8765, + HostIp: "126.125.124.123", + }, + }, + cniPortMappings: []cni.PortMapping{ + { + HostPort: 8765, + ContainerPort: 4321, + Protocol: "tcp", + HostIP: "126.125.124.123", + }, + }, + }, + "CRI port mapping with unsupported protocol should be skipped": { + criPortMappings: []*runtime.PortMapping{ + { + Protocol: runtime.Protocol_TCP, + ContainerPort: 4321, + HostPort: 8765, + HostIp: "126.125.124.123", + }, + }, + cniPortMappings: []cni.PortMapping{ + { + HostPort: 8765, + ContainerPort: 4321, + Protocol: "tcp", + HostIP: "126.125.124.123", + }, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + assert.Equal(t, test.cniPortMappings, toCNIPortMappings(test.criPortMappings)) + }) + } +} + +func TestSelectPodIP(t *testing.T) { + for desc, test := range map[string]struct { + ips []string + expectedIP string + expectedAdditionalIPs []string + pref string + }{ + "ipv4 should be picked even if ipv6 comes first": { + ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, + expectedIP: "192.168.17.43", + expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334"}, + }, + "ipv6 should be picked even if ipv4 comes first": { + ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, + expectedIP: "2001:db8:85a3::8a2e:370:7334", + expectedAdditionalIPs: []string{"192.168.17.43"}, + pref: "ipv6", + }, + "order should reflect ip selection": { + ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, + expectedIP: "2001:db8:85a3::8a2e:370:7334", + expectedAdditionalIPs: []string{"192.168.17.43"}, + pref: "cni", + }, + + "ipv4 should be picked when there is only ipv4": { + ips: []string{"192.168.17.43"}, + expectedIP: "192.168.17.43", + expectedAdditionalIPs: nil, + }, + "ipv6 should be picked when there is no ipv4": { + ips: []string{"2001:db8:85a3::8a2e:370:7334"}, + expectedIP: "2001:db8:85a3::8a2e:370:7334", + expectedAdditionalIPs: nil, + }, + "the first ipv4 should be picked when there are multiple ipv4": { // unlikely to happen + ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"}, + expectedIP: "192.168.17.43", + expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"}, + }, + } { + t.Run(desc, func(t *testing.T) { + var ipConfigs []*cni.IPConfig + for _, ip := range test.ips { + ipConfigs = append(ipConfigs, &cni.IPConfig{ + IP: net.ParseIP(ip), + }) + } + ip, additionalIPs := selectPodIPs(context.Background(), ipConfigs, test.pref) + assert.Equal(t, test.expectedIP, ip) + assert.Equal(t, test.expectedAdditionalIPs, additionalIPs) + }) + } +} + +func TestHostAccessingSandbox(t *testing.T) { + privilegedContext := &runtime.PodSandboxConfig{ + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + Privileged: true, + }, + }, + } + nonPrivilegedContext := &runtime.PodSandboxConfig{ + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + Privileged: false, + }, + }, + } + hostNamespace := &runtime.PodSandboxConfig{ + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + Privileged: false, + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_NODE, + Ipc: runtime.NamespaceMode_NODE, + }, + }, + }, + } + tests := []struct { + name string + config *runtime.PodSandboxConfig + want bool + }{ + {"Security Context is nil", nil, false}, + {"Security Context is privileged", privilegedContext, false}, + {"Security Context is not privileged", nonPrivilegedContext, false}, + {"Security Context namespace host access", hostNamespace, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := hostAccessingSandbox(tt.config); got != tt.want { + t.Errorf("hostAccessingSandbox() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestGetSandboxRuntime(t *testing.T) { + untrustedWorkloadRuntime := criconfig.Runtime{ + Type: "io.containerd.runtime.v1.linux", + Engine: "untrusted-workload-runtime", + Root: "", + } + + defaultRuntime := criconfig.Runtime{ + Type: "io.containerd.runtime.v1.linux", + Engine: "default-runtime", + Root: "", + } + + fooRuntime := criconfig.Runtime{ + Type: "io.containerd.runtime.v1.linux", + Engine: "foo-bar", + Root: "", + } + + for desc, test := range map[string]struct { + sandboxConfig *runtime.PodSandboxConfig + runtimeHandler string + runtimes map[string]criconfig.Runtime + expectErr bool + expectedRuntime criconfig.Runtime + }{ + "should return error if untrusted workload requires host access": { + sandboxConfig: &runtime.PodSandboxConfig{ + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + Privileged: false, + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_NODE, + Ipc: runtime.NamespaceMode_NODE, + }, + }, + }, + Annotations: map[string]string{ + annotations.UntrustedWorkload: "true", + }, + }, + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + }, + expectErr: true, + }, + "should use untrusted workload runtime for untrusted workload": { + sandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.UntrustedWorkload: "true", + }, + }, + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + }, + expectedRuntime: untrustedWorkloadRuntime, + }, + "should use default runtime for regular workload": { + sandboxConfig: &runtime.PodSandboxConfig{}, + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + }, + expectedRuntime: defaultRuntime, + }, + "should use default runtime for trusted workload": { + sandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.UntrustedWorkload: "false", + }, + }, + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + }, + expectedRuntime: defaultRuntime, + }, + "should return error if untrusted workload runtime is required but not configured": { + sandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.UntrustedWorkload: "true", + }, + }, + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + }, + expectErr: true, + }, + "should use 'untrusted' runtime for untrusted workload": { + sandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.UntrustedWorkload: "true", + }, + }, + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + }, + expectedRuntime: untrustedWorkloadRuntime, + }, + "should use 'untrusted' runtime for untrusted workload & handler": { + sandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.UntrustedWorkload: "true", + }, + }, + runtimeHandler: "untrusted", + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + }, + expectedRuntime: untrustedWorkloadRuntime, + }, + "should return an error if untrusted annotation with conflicting handler": { + sandboxConfig: &runtime.PodSandboxConfig{ + Annotations: map[string]string{ + annotations.UntrustedWorkload: "true", + }, + }, + runtimeHandler: "foo", + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + "foo": fooRuntime, + }, + expectErr: true, + }, + "should use correct runtime for a runtime handler": { + sandboxConfig: &runtime.PodSandboxConfig{}, + runtimeHandler: "foo", + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + criconfig.RuntimeUntrusted: untrustedWorkloadRuntime, + "foo": fooRuntime, + }, + expectedRuntime: fooRuntime, + }, + "should return error if runtime handler is required but not configured": { + sandboxConfig: &runtime.PodSandboxConfig{}, + runtimeHandler: "bar", + runtimes: map[string]criconfig.Runtime{ + criconfig.RuntimeDefault: defaultRuntime, + "foo": fooRuntime, + }, + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + cri := newTestCRIService() + cri.config = criconfig.Config{ + PluginConfig: criconfig.DefaultConfig(), + } + cri.config.ContainerdConfig.DefaultRuntimeName = criconfig.RuntimeDefault + cri.config.ContainerdConfig.Runtimes = test.runtimes + r, err := cri.getSandboxRuntime(test.sandboxConfig, test.runtimeHandler) + assert.Equal(t, test.expectErr, err != nil) + assert.Equal(t, test.expectedRuntime, r) + }) + } +} diff --git a/pkg/cri/sbserver/sandbox_run_windows.go b/pkg/cri/sbserver/sandbox_run_windows.go new file mode 100644 index 000000000..db363c434 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_windows.go @@ -0,0 +1,113 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + "strconv" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/oci" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + customopts "github.com/containerd/containerd/pkg/cri/opts" +) + +func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) { + // Creates a spec Generator with the default spec. + specOpts := []oci.SpecOpts{ + oci.WithEnv(imageConfig.Env), + oci.WithHostname(config.GetHostname()), + } + if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 { + // Pause image must have entrypoint or cmd. + return nil, fmt.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig) + } + specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...)) + + specOpts = append(specOpts, + // Clear the root location since hcsshim expects it. + // NOTE: readonly rootfs doesn't work on windows. + customopts.WithoutRoot, + customopts.WithWindowsNetworkNamespace(nsPath), + ) + + specOpts = append(specOpts, customopts.WithWindowsDefaultSandboxShares) + + // Start with the image config user and override below if RunAsUsername is not "". + username := imageConfig.User + + runAsUser := config.GetWindows().GetSecurityContext().GetRunAsUsername() + if runAsUser != "" { + username = runAsUser + } + + cs := config.GetWindows().GetSecurityContext().GetCredentialSpec() + if cs != "" { + specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs)) + } + + // There really isn't a good Windows way to verify that the username is available in the + // image as early as here like there is for Linux. Later on in the stack hcsshim + // will handle the behavior of erroring out if the user isn't available in the image + // when trying to run the init process. + specOpts = append(specOpts, oci.WithUser(username)) + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + runtimePodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, + customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox), + customopts.WithAnnotation(annotations.SandboxID, id), + customopts.WithAnnotation(annotations.SandboxNamespace, config.GetMetadata().GetNamespace()), + customopts.WithAnnotation(annotations.SandboxName, config.GetMetadata().GetName()), + customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()), + customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(config.GetWindows().GetSecurityContext().GetHostProcess())), + ) + + return c.runtimeSpec(id, "", specOpts...) +} + +// No sandbox container spec options for windows yet. +func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { + return nil, nil +} + +// No sandbox files needed for windows. +func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + return nil +} + +// No sandbox files needed for windows. +func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { + return nil +} + +// No task options needed for windows. +func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { + return nil +} diff --git a/pkg/cri/sbserver/sandbox_run_windows_test.go b/pkg/cri/sbserver/sandbox_run_windows_test.go new file mode 100644 index 000000000..6569f67e0 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_run_windows_test.go @@ -0,0 +1,108 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/opts" +) + +func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { + config := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-name", + Uid: "test-uid", + Namespace: "test-ns", + Attempt: 1, + }, + Hostname: "test-hostname", + LogDirectory: "test-log-directory", + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + Windows: &runtime.WindowsPodSandboxConfig{ + SecurityContext: &runtime.WindowsSandboxSecurityContext{ + RunAsUsername: "test-user", + CredentialSpec: "{\"test\": \"spec\"}", + HostProcess: false, + }, + }, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"a=b", "c=d"}, + Entrypoint: []string{"/pause"}, + Cmd: []string{"forever"}, + WorkingDir: "/workspace", + User: "test-image-user", + } + specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { + assert.Equal(t, "test-hostname", spec.Hostname) + assert.Nil(t, spec.Root) + assert.Contains(t, spec.Process.Env, "a=b", "c=d") + assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args) + assert.Equal(t, "/workspace", spec.Process.Cwd) + assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, opts.DefaultSandboxCPUshares) + + // Also checks if override of the image configs user is behaving. + t.Logf("Check username") + assert.Contains(t, spec.Process.User.Username, "test-user") + + t.Logf("Check credential spec") + assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}") + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox) + + assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) + assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-ns") + + assert.Contains(t, spec.Annotations, annotations.SandboxName) + assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-name") + + assert.Contains(t, spec.Annotations, annotations.SandboxLogDir) + assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory") + + assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess) + assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false") + } + return config, imageConfig, specCheck +} + +func TestSandboxWindowsNetworkNamespace(t *testing.T) { + testID := "test-id" + nsPath := "test-cni" + c := newTestCRIService() + + config, imageConfig, specCheck := getRunPodSandboxTestData() + spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil) + assert.NoError(t, err) + assert.NotNil(t, spec) + specCheck(t, testID, spec) + assert.NotNil(t, spec.Windows) + assert.NotNil(t, spec.Windows.Network) + assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace) +} diff --git a/pkg/cri/sbserver/sandbox_stats.go b/pkg/cri/sbserver/sandbox_stats.go new file mode 100644 index 000000000..3e805e09f --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stats.go @@ -0,0 +1,47 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func (c *criService) PodSandboxStats( + ctx context.Context, + r *runtime.PodSandboxStatsRequest, +) (*runtime.PodSandboxStatsResponse, error) { + + sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when trying to find sandbox %s: %w", r.GetPodSandboxId(), err) + } + + metrics, err := metricsForSandbox(sandbox) + if err != nil { //nolint:staticcheck // Ignore SA4023 as some platforms always return nil (unimplemented metrics) + return nil, fmt.Errorf("failed getting metrics for sandbox %s: %w", r.GetPodSandboxId(), err) + } + + podSandboxStats, err := c.podSandboxStats(ctx, sandbox, metrics) + if err != nil { //nolint:staticcheck // Ignore SA4023 as some platforms always return nil (unimplemented metrics) + return nil, fmt.Errorf("failed to decode pod sandbox metrics %s: %w", r.GetPodSandboxId(), err) + } + + return &runtime.PodSandboxStatsResponse{Stats: podSandboxStats}, nil +} diff --git a/pkg/cri/sbserver/sandbox_stats_linux.go b/pkg/cri/sbserver/sandbox_stats_linux.go new file mode 100644 index 000000000..f506cbde6 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stats_linux.go @@ -0,0 +1,177 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "time" + + "github.com/containernetworking/plugins/pkg/ns" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/cgroups" + cgroupsv2 "github.com/containerd/cgroups/v2" + + "github.com/vishvananda/netlink" + + "github.com/containerd/containerd/log" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func (c *criService) podSandboxStats( + ctx context.Context, + sandbox sandboxstore.Sandbox, + stats interface{}, +) (*runtime.PodSandboxStats, error) { + meta := sandbox.Metadata + + if sandbox.Status.Get().State != sandboxstore.StateReady { + return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID) + } + + var podSandboxStats runtime.PodSandboxStats + podSandboxStats.Attributes = &runtime.PodSandboxAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + } + + podSandboxStats.Linux = &runtime.LinuxPodSandboxStats{} + + if stats != nil { + timestamp := time.Now() + + cpuStats, err := c.cpuContainerStats(meta.ID, true /* isSandbox */, stats, timestamp) + if err != nil { + return nil, fmt.Errorf("failed to obtain cpu stats: %w", err) + } + podSandboxStats.Linux.Cpu = cpuStats + + memoryStats, err := c.memoryContainerStats(meta.ID, stats, timestamp) + if err != nil { + return nil, fmt.Errorf("failed to obtain memory stats: %w", err) + } + podSandboxStats.Linux.Memory = memoryStats + + if sandbox.NetNSPath != "" { + rxBytes, rxErrors, txBytes, txErrors := getContainerNetIO(ctx, sandbox.NetNSPath) + podSandboxStats.Linux.Network = &runtime.NetworkUsage{ + DefaultInterface: &runtime.NetworkInterfaceUsage{ + Name: defaultIfName, + RxBytes: &runtime.UInt64Value{Value: rxBytes}, + RxErrors: &runtime.UInt64Value{Value: rxErrors}, + TxBytes: &runtime.UInt64Value{Value: txBytes}, + TxErrors: &runtime.UInt64Value{Value: txErrors}, + }, + } + } + + var pidCount uint64 + for _, cntr := range c.containerStore.List() { + if cntr.SandboxID != sandbox.ID { + continue + } + + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + continue + } + + task, err := cntr.Container.Task(ctx, nil) + if err != nil { + return nil, err + } + + processes, err := task.Pids(ctx) + if err != nil { + return nil, err + } + pidCount += uint64(len(processes)) + + } + podSandboxStats.Linux.Process = &runtime.ProcessUsage{ + Timestamp: timestamp.UnixNano(), + ProcessCount: &runtime.UInt64Value{Value: pidCount}, + } + + listContainerStatsRequest := &runtime.ListContainerStatsRequest{Filter: &runtime.ContainerStatsFilter{PodSandboxId: meta.ID}} + resp, err := c.ListContainerStats(ctx, listContainerStatsRequest) + if err != nil { + return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err) + } + podSandboxStats.Linux.Containers = resp.GetStats() + } + + return &podSandboxStats, nil +} + +// https://github.com/cri-o/cri-o/blob/74a5cf8dffd305b311eb1c7f43a4781738c388c1/internal/oci/stats.go#L32 +func getContainerNetIO(ctx context.Context, netNsPath string) (rxBytes, rxErrors, txBytes, txErrors uint64) { + ns.WithNetNSPath(netNsPath, func(_ ns.NetNS) error { + link, err := netlink.LinkByName(defaultIfName) + if err != nil { + log.G(ctx).WithError(err).Errorf("unable to retrieve network namespace stats for netNsPath: %v, interface: %v", netNsPath, defaultIfName) + return err + } + attrs := link.Attrs() + if attrs != nil && attrs.Statistics != nil { + rxBytes = attrs.Statistics.RxBytes + rxErrors = attrs.Statistics.RxErrors + txBytes = attrs.Statistics.TxBytes + txErrors = attrs.Statistics.TxErrors + } + return nil + }) + + return rxBytes, rxErrors, txBytes, txErrors +} + +func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { + cgroupPath := sandbox.Config.GetLinux().GetCgroupParent() + + if cgroupPath == "" { + return nil, fmt.Errorf("failed to get cgroup metrics for sandbox %v because cgroupPath is empty", sandbox.ID) + } + + var statsx interface{} + if cgroups.Mode() == cgroups.Unified { + cg, err := cgroupsv2.LoadManager("/sys/fs/cgroup", cgroupPath) + if err != nil { + return nil, fmt.Errorf("failed to load sandbox cgroup: %v: %w", cgroupPath, err) + } + stats, err := cg.Stat() + if err != nil { + return nil, fmt.Errorf("failed to get stats for cgroup: %v: %w", cgroupPath, err) + } + statsx = stats + + } else { + control, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(cgroupPath)) + if err != nil { + return nil, fmt.Errorf("failed to load sandbox cgroup %v: %w", cgroupPath, err) + } + stats, err := control.Stat(cgroups.IgnoreNotExist) + if err != nil { + return nil, fmt.Errorf("failed to get stats for cgroup %v: %w", cgroupPath, err) + } + statsx = stats + } + + return statsx, nil +} diff --git a/pkg/cri/sbserver/sandbox_stats_list.go b/pkg/cri/sbserver/sandbox_stats_list.go new file mode 100644 index 000000000..69d4336a3 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stats_list.go @@ -0,0 +1,80 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// ListPodSandboxStats returns stats of all ready sandboxes. +func (c *criService) ListPodSandboxStats( + ctx context.Context, + r *runtime.ListPodSandboxStatsRequest, +) (*runtime.ListPodSandboxStatsResponse, error) { + sandboxes := c.sandboxesForListPodSandboxStatsRequest(r) + + podSandboxStats := new(runtime.ListPodSandboxStatsResponse) + for _, sandbox := range sandboxes { + metrics, err := metricsForSandbox(sandbox) + if err != nil { //nolint:staticcheck // Ignore SA4023 as some platforms always return nil (unimplemented metrics) + return nil, fmt.Errorf("failed to obtain metrics for sandbox %q: %w", sandbox.ID, err) + } + + sandboxStats, err := c.podSandboxStats(ctx, sandbox, metrics) + if err != nil { //nolint:staticcheck // Ignore SA4023 as some platforms always return nil (unimplemented metrics) + return nil, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err) + } + podSandboxStats.Stats = append(podSandboxStats.Stats, sandboxStats) + } + + return podSandboxStats, nil +} + +func (c *criService) sandboxesForListPodSandboxStatsRequest(r *runtime.ListPodSandboxStatsRequest) []sandboxstore.Sandbox { + sandboxesInStore := c.sandboxStore.List() + + if r.GetFilter() == nil { + return sandboxesInStore + } + + c.normalizePodSandboxStatsFilter(r.GetFilter()) + + var sandboxes []sandboxstore.Sandbox + for _, sandbox := range sandboxesInStore { + if r.GetFilter().GetId() != "" && sandbox.ID != r.GetFilter().GetId() { + continue + } + + if r.GetFilter().GetLabelSelector() != nil && + !matchLabelSelector(r.GetFilter().GetLabelSelector(), sandbox.Config.GetLabels()) { + continue + } + + // We can't obtain metrics for sandboxes that aren't in ready state + if sandbox.Status.Get().State != sandboxstore.StateReady { + continue + } + + sandboxes = append(sandboxes, sandbox) + } + + return sandboxes +} diff --git a/pkg/cri/sbserver/sandbox_stats_other.go b/pkg/cri/sbserver/sandbox_stats_other.go new file mode 100644 index 000000000..393bc784b --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stats_other.go @@ -0,0 +1,38 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/errdefs" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func (c *criService) podSandboxStats(ctx context.Context, sandbox sandboxstore.Sandbox, stats interface{}) (*runtime.PodSandboxStats, error) { + return nil, fmt.Errorf("pod sandbox stats not implemented: %w", errdefs.ErrNotImplemented) +} + +func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { + return nil, fmt.Errorf("metrics for sandbox not implemented: %w", errdefs.ErrNotImplemented) +} diff --git a/pkg/cri/sbserver/sandbox_stats_windows.go b/pkg/cri/sbserver/sandbox_stats_windows.go new file mode 100644 index 000000000..54b346e6c --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stats_windows.go @@ -0,0 +1,35 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/errdefs" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func (c *criService) podSandboxStats(ctx context.Context, sandbox sandboxstore.Sandbox, stats interface{}) (*runtime.PodSandboxStats, error) { + return nil, fmt.Errorf("pod sandbox stats not implemented on windows: %w", errdefs.ErrNotImplemented) +} + +func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { + return nil, fmt.Errorf("metrics for sandbox not implemented on windows: %w", errdefs.ErrNotImplemented) +} diff --git a/pkg/cri/sbserver/sandbox_status.go b/pkg/cri/sbserver/sandbox_status.go new file mode 100644 index 000000000..3f21e8800 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_status.go @@ -0,0 +1,220 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "encoding/json" + "fmt" + goruntime "runtime" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/go-cni" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +// PodSandboxStatus returns the status of the PodSandbox. +func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (*runtime.PodSandboxStatusResponse, error) { + sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when try to find sandbox: %w", err) + } + + ip, additionalIPs, err := c.getIPs(sandbox) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox ip: %w", err) + } + status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip, additionalIPs) + if status.GetCreatedAt() == 0 { + // CRI doesn't allow CreatedAt == 0. + info, err := sandbox.Container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get CreatedAt for sandbox container in %q state: %w", status.State, err) + } + status.CreatedAt = info.CreatedAt.UnixNano() + } + if !r.GetVerbose() { + return &runtime.PodSandboxStatusResponse{Status: status}, nil + } + + // Generate verbose information. + info, err := toCRISandboxInfo(ctx, sandbox) + if err != nil { + return nil, fmt.Errorf("failed to get verbose sandbox container info: %w", err) + } + + return &runtime.PodSandboxStatusResponse{ + Status: status, + Info: info, + }, nil +} + +func (c *criService) getIPs(sandbox sandboxstore.Sandbox) (string, []string, error) { + config := sandbox.Config + + if goruntime.GOOS != "windows" && + config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + // For sandboxes using the node network we are not + // responsible for reporting the IP. + return "", nil, nil + } + if goruntime.GOOS == "windows" && config.GetWindows().GetSecurityContext().GetHostProcess() { + return "", nil, nil + } + + if closed, err := sandbox.NetNS.Closed(); err != nil { + return "", nil, fmt.Errorf("check network namespace closed: %w", err) + } else if closed { + return "", nil, nil + } + + return sandbox.IP, sandbox.AdditionalIPs, nil +} + +// toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status. +func toCRISandboxStatus(meta sandboxstore.Metadata, status sandboxstore.Status, ip string, additionalIPs []string) *runtime.PodSandboxStatus { + // Set sandbox state to NOTREADY by default. + state := runtime.PodSandboxState_SANDBOX_NOTREADY + if status.State == sandboxstore.StateReady { + state = runtime.PodSandboxState_SANDBOX_READY + } + nsOpts := meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions() + var ips []*runtime.PodIP + for _, additionalIP := range additionalIPs { + ips = append(ips, &runtime.PodIP{Ip: additionalIP}) + } + return &runtime.PodSandboxStatus{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + State: state, + CreatedAt: status.CreatedAt.UnixNano(), + Network: &runtime.PodSandboxNetworkStatus{ + Ip: ip, + AdditionalIps: ips, + }, + Linux: &runtime.LinuxPodSandboxStatus{ + Namespaces: &runtime.Namespace{ + Options: &runtime.NamespaceOption{ + Network: nsOpts.GetNetwork(), + Pid: nsOpts.GetPid(), + Ipc: nsOpts.GetIpc(), + }, + }, + }, + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + RuntimeHandler: meta.RuntimeHandler, + } +} + +// SandboxInfo is extra information for sandbox. +// TODO (mikebrow): discuss predefining constants structures for some or all of these field names in CRI +type SandboxInfo struct { + Pid uint32 `json:"pid"` + Status string `json:"processStatus"` + NetNSClosed bool `json:"netNamespaceClosed"` + Image string `json:"image"` + SnapshotKey string `json:"snapshotKey"` + Snapshotter string `json:"snapshotter"` + // Note: a new field `RuntimeHandler` has been added into the CRI PodSandboxStatus struct, and + // should be set. This `RuntimeHandler` field will be deprecated after containerd 1.3 (tracked + // in https://github.com/containerd/cri/issues/1064). + RuntimeHandler string `json:"runtimeHandler"` // see the Note above + RuntimeType string `json:"runtimeType"` + RuntimeOptions interface{} `json:"runtimeOptions"` + Config *runtime.PodSandboxConfig `json:"config"` + RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"` + CNIResult *cni.Result `json:"cniResult"` +} + +// toCRISandboxInfo converts internal container object information to CRI sandbox status response info map. +func toCRISandboxInfo(ctx context.Context, sandbox sandboxstore.Sandbox) (map[string]string, error) { + container := sandbox.Container + task, err := container.Task(ctx, nil) + if err != nil && !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to get sandbox container task: %w", err) + } + + var processStatus containerd.ProcessStatus + if task != nil { + taskStatus, err := task.Status(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get task status: %w", err) + } + + processStatus = taskStatus.Status + } + + si := &SandboxInfo{ + Pid: sandbox.Status.Get().Pid, + RuntimeHandler: sandbox.RuntimeHandler, + Status: string(processStatus), + Config: sandbox.Config, + CNIResult: sandbox.CNIResult, + } + + if si.Status == "" { + // If processStatus is empty, it means that the task is deleted. Apply "deleted" + // status which does not exist in containerd. + si.Status = "deleted" + } + + if sandbox.NetNS != nil { + // Add network closed information if sandbox is not using host network. + closed, err := sandbox.NetNS.Closed() + if err != nil { + return nil, fmt.Errorf("failed to check network namespace closed: %w", err) + } + si.NetNSClosed = closed + } + + spec, err := container.Spec(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox container runtime spec: %w", err) + } + si.RuntimeSpec = spec + + ctrInfo, err := container.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox container info: %w", err) + } + // Do not use config.SandboxImage because the configuration might + // be changed during restart. It may not reflect the actual image + // used by the sandbox container. + si.Image = ctrInfo.Image + si.SnapshotKey = ctrInfo.SnapshotKey + si.Snapshotter = ctrInfo.Snapshotter + + runtimeOptions, err := getRuntimeOptions(ctrInfo) + if err != nil { + return nil, fmt.Errorf("failed to get runtime options: %w", err) + } + si.RuntimeType = ctrInfo.Runtime.Name + si.RuntimeOptions = runtimeOptions + + infoBytes, err := json.Marshal(si) + if err != nil { + return nil, fmt.Errorf("failed to marshal info %v: %w", si, err) + } + return map[string]string{ + "info": string(infoBytes), + }, nil +} diff --git a/pkg/cri/sbserver/sandbox_status_test.go b/pkg/cri/sbserver/sandbox_status_test.go new file mode 100644 index 000000000..b0423ddb7 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_status_test.go @@ -0,0 +1,117 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func TestPodSandboxStatus(t *testing.T) { + const ( + id = "test-id" + ip = "10.10.10.10" + ) + additionalIPs := []string{"8.8.8.8", "2001:db8:85a3::8a2e:370:7334"} + createdAt := time.Now() + config := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-name", + Uid: "test-uid", + Namespace: "test-ns", + Attempt: 1, + }, + Linux: &runtime.LinuxPodSandboxConfig{ + SecurityContext: &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_CONTAINER, + Ipc: runtime.NamespaceMode_POD, + }, + }, + }, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"c": "d"}, + } + metadata := sandboxstore.Metadata{ + ID: id, + Name: "test-name", + Config: config, + RuntimeHandler: "test-runtime-handler", + } + + expected := &runtime.PodSandboxStatus{ + Id: id, + Metadata: config.GetMetadata(), + CreatedAt: createdAt.UnixNano(), + Network: &runtime.PodSandboxNetworkStatus{ + Ip: ip, + AdditionalIps: []*runtime.PodIP{ + { + Ip: additionalIPs[0], + }, + { + Ip: additionalIPs[1], + }, + }, + }, + Linux: &runtime.LinuxPodSandboxStatus{ + Namespaces: &runtime.Namespace{ + Options: &runtime.NamespaceOption{ + Network: runtime.NamespaceMode_NODE, + Pid: runtime.NamespaceMode_CONTAINER, + Ipc: runtime.NamespaceMode_POD, + }, + }, + }, + Labels: config.GetLabels(), + Annotations: config.GetAnnotations(), + RuntimeHandler: "test-runtime-handler", + } + for desc, test := range map[string]struct { + state sandboxstore.State + expectedState runtime.PodSandboxState + }{ + "sandbox state ready": { + state: sandboxstore.StateReady, + expectedState: runtime.PodSandboxState_SANDBOX_READY, + }, + "sandbox state not ready": { + state: sandboxstore.StateNotReady, + expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, + }, + "sandbox state unknown": { + state: sandboxstore.StateUnknown, + expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, + }, + } { + t.Run(desc, func(t *testing.T) { + status := sandboxstore.Status{ + CreatedAt: createdAt, + State: test.state, + } + expected.State = test.expectedState + got := toCRISandboxStatus(metadata, status, ip, additionalIPs) + assert.Equal(t, expected, got) + }) + } +} diff --git a/pkg/cri/sbserver/sandbox_stop.go b/pkg/cri/sbserver/sandbox_stop.go new file mode 100644 index 000000000..997d72eba --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stop.go @@ -0,0 +1,202 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "fmt" + "syscall" + "time" + + eventtypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/protobuf" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" +) + +// StopPodSandbox stops the sandbox. If there are any running containers in the +// sandbox, they should be forcibly terminated. +func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (*runtime.StopPodSandboxResponse, error) { + sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) + if err != nil { + return nil, fmt.Errorf("an error occurred when try to find sandbox %q: %w", + r.GetPodSandboxId(), err) + } + + if err := c.stopPodSandbox(ctx, sandbox); err != nil { + return nil, err + } + + return &runtime.StopPodSandboxResponse{}, nil +} + +func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sandbox) error { + // Use the full sandbox id. + id := sandbox.ID + + // Stop all containers inside the sandbox. This terminates the container forcibly, + // and container may still be created, so production should not rely on this behavior. + // TODO(random-liu): Introduce a state in sandbox to avoid future container creation. + stop := time.Now() + containers := c.containerStore.List() + for _, container := range containers { + if container.SandboxID != id { + continue + } + // Forcibly stop the container. Do not use `StopContainer`, because it introduces a race + // if a container is removed after list. + if err := c.stopContainer(ctx, container, 0); err != nil { + return fmt.Errorf("failed to stop container %q: %w", container.ID, err) + } + } + + if err := c.cleanupSandboxFiles(id, sandbox.Config); err != nil { + return fmt.Errorf("failed to cleanup sandbox files: %w", err) + } + + // Only stop sandbox container when it's running or unknown. + state := sandbox.Status.Get().State + if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown { + if err := c.stopSandboxContainer(ctx, sandbox); err != nil { + return fmt.Errorf("failed to stop sandbox container %q in %q state: %w", id, state, err) + } + } + sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop) + + // Teardown network for sandbox. + if sandbox.NetNS != nil { + netStop := time.Now() + // Use empty netns path if netns is not available. This is defined in: + // https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md + if closed, err := sandbox.NetNS.Closed(); err != nil { + return fmt.Errorf("failed to check network namespace closed: %w", err) + } else if closed { + sandbox.NetNSPath = "" + } + if err := c.teardownPodNetwork(ctx, sandbox); err != nil { + return fmt.Errorf("failed to destroy network for sandbox %q: %w", id, err) + } + if err := sandbox.NetNS.Remove(); err != nil { + return fmt.Errorf("failed to remove network namespace for sandbox %q: %w", id, err) + } + sandboxDeleteNetwork.UpdateSince(netStop) + } + + log.G(ctx).Infof("TearDown network for sandbox %q successfully", id) + + return nil +} + +// stopSandboxContainer kills the sandbox container. +// `task.Delete` is not called here because it will be called when +// the event monitor handles the `TaskExit` event. +func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error { + id := sandbox.ID + container := sandbox.Container + state := sandbox.Status.Get().State + task, err := container.Task(ctx, nil) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to get sandbox container: %w", err) + } + // Don't return for unknown state, some cleanup needs to be done. + if state == sandboxstore.StateUnknown { + return cleanupUnknownSandbox(ctx, id, sandbox) + } + return nil + } + + // Handle unknown state. + // The cleanup logic is the same with container unknown state. + if state == sandboxstore.StateUnknown { + // Start an exit handler for containers in unknown state. + waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext()) + defer waitCancel() + exitCh, err := task.Wait(waitCtx) + if err != nil { + if !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to wait for task: %w", err) + } + return cleanupUnknownSandbox(ctx, id, sandbox) + } + + exitCtx, exitCancel := context.WithCancel(context.Background()) + stopCh := c.eventMonitor.startSandboxExitMonitor(exitCtx, id, task.Pid(), exitCh) + defer func() { + exitCancel() + // This ensures that exit monitor is stopped before + // `Wait` is cancelled, so no exit event is generated + // because of the `Wait` cancellation. + <-stopCh + }() + } + + // Kill the sandbox container. + if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("failed to kill sandbox container: %w", err) + } + + return c.waitSandboxStop(ctx, sandbox) +} + +// waitSandboxStop waits for sandbox to be stopped until context is cancelled or +// the context deadline is exceeded. +func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox) error { + select { + case <-ctx.Done(): + return fmt.Errorf("wait sandbox container %q: %w", sandbox.ID, ctx.Err()) + case <-sandbox.Stopped(): + return nil + } +} + +// teardownPodNetwork removes the network from the pod +func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstore.Sandbox) error { + netPlugin := c.getNetworkPlugin(sandbox.RuntimeHandler) + if netPlugin == nil { + return errors.New("cni config not initialized") + } + + var ( + id = sandbox.ID + path = sandbox.NetNSPath + config = sandbox.Config + ) + opts, err := cniNamespaceOpts(id, config) + if err != nil { + return fmt.Errorf("get cni namespace options: %w", err) + } + + return netPlugin.Remove(ctx, id, path, opts...) +} + +// cleanupUnknownSandbox cleanup stopped sandbox in unknown state. +func cleanupUnknownSandbox(ctx context.Context, id string, sandbox sandboxstore.Sandbox) error { + // Reuse handleSandboxExit to do the cleanup. + return handleSandboxExit(ctx, &eventtypes.TaskExit{ + ContainerID: id, + ID: id, + Pid: 0, + ExitStatus: unknownExitCode, + ExitedAt: protobuf.ToTimestamp(time.Now()), + }, sandbox) +} diff --git a/pkg/cri/sbserver/sandbox_stop_test.go b/pkg/cri/sbserver/sandbox_stop_test.go new file mode 100644 index 000000000..b575a4da7 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stop_test.go @@ -0,0 +1,75 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" +) + +func TestWaitSandboxStop(t *testing.T) { + id := "test-id" + for desc, test := range map[string]struct { + state sandboxstore.State + cancel bool + timeout time.Duration + expectErr bool + }{ + "should return error if timeout exceeds": { + state: sandboxstore.StateReady, + timeout: 200 * time.Millisecond, + expectErr: true, + }, + "should return error if context is cancelled": { + state: sandboxstore.StateReady, + timeout: time.Hour, + cancel: true, + expectErr: true, + }, + "should not return error if sandbox is stopped before timeout": { + state: sandboxstore.StateNotReady, + timeout: time.Hour, + expectErr: false, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + sandbox := sandboxstore.NewSandbox( + sandboxstore.Metadata{ID: id}, + sandboxstore.Status{State: test.state}, + ) + ctx := context.Background() + if test.cancel { + cancelledCtx, cancel := context.WithCancel(ctx) + cancel() + ctx = cancelledCtx + } + if test.timeout > 0 { + timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) + defer cancel() + ctx = timeoutCtx + } + err := c.waitSandboxStop(ctx, sandbox) + assert.Equal(t, test.expectErr, err != nil, desc) + }) + } +} diff --git a/pkg/cri/sbserver/service.go b/pkg/cri/sbserver/service.go new file mode 100644 index 000000000..96502cb7c --- /dev/null +++ b/pkg/cri/sbserver/service.go @@ -0,0 +1,369 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "sync" + "time" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/pkg/cri/streaming" + "github.com/containerd/containerd/pkg/kmutex" + "github.com/containerd/containerd/plugin" + "github.com/containerd/go-cni" + "github.com/sirupsen/logrus" + "google.golang.org/grpc" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + runtime_alpha "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + "github.com/containerd/containerd/pkg/cri/store/label" + + "github.com/containerd/containerd/pkg/atomic" + criconfig "github.com/containerd/containerd/pkg/cri/config" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" + osinterface "github.com/containerd/containerd/pkg/os" + "github.com/containerd/containerd/pkg/registrar" +) + +// defaultNetworkPlugin is used for the default CNI configuration +const defaultNetworkPlugin = "default" + +// grpcServices are all the grpc services provided by cri containerd. +type grpcServices interface { + runtime.RuntimeServiceServer + runtime.ImageServiceServer +} + +type grpcAlphaServices interface { + runtime_alpha.RuntimeServiceServer + runtime_alpha.ImageServiceServer +} + +// CRIService is the interface implement CRI remote service server. +type CRIService interface { + Run() error + // io.Closer is used by containerd to gracefully stop cri service. + io.Closer + Register(*grpc.Server) error + grpcServices +} + +// criService implements CRIService. +type criService struct { + // config contains all configurations. + config criconfig.Config + // imageFSPath is the path to image filesystem. + imageFSPath string + // os is an interface for all required os operations. + os osinterface.OS + // sandboxStore stores all resources associated with sandboxes. + sandboxStore *sandboxstore.Store + // sandboxNameIndex stores all sandbox names and make sure each name + // is unique. + sandboxNameIndex *registrar.Registrar + // containerStore stores all resources associated with containers. + containerStore *containerstore.Store + // containerNameIndex stores all container names and make sure each + // name is unique. + containerNameIndex *registrar.Registrar + // imageStore stores all resources associated with images. + imageStore *imagestore.Store + // snapshotStore stores information of all snapshots. + snapshotStore *snapshotstore.Store + // netPlugin is used to setup and teardown network when run/stop pod sandbox. + netPlugin map[string]cni.CNI + // client is an instance of the containerd client + client *containerd.Client + // streamServer is the streaming server serves container streaming request. + streamServer streaming.Server + // eventMonitor is the monitor monitors containerd events. + eventMonitor *eventMonitor + // initialized indicates whether the server is initialized. All GRPC services + // should return error before the server is initialized. + initialized atomic.Bool + // cniNetConfMonitor is used to reload cni network conf if there is + // any valid fs change events from cni network conf dir. + cniNetConfMonitor map[string]*cniNetConfSyncer + // baseOCISpecs contains cached OCI specs loaded via `Runtime.BaseRuntimeSpec` + baseOCISpecs map[string]*oci.Spec + // allCaps is the list of the capabilities. + // When nil, parsed from CapEff of /proc/self/status. + allCaps []string // nolint + // unpackDuplicationSuppressor is used to make sure that there is only + // one in-flight fetch request or unpack handler for a given descriptor's + // or chain ID. + unpackDuplicationSuppressor kmutex.KeyedLocker +} + +// NewCRIService returns a new instance of CRIService +func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIService, error) { + var err error + labels := label.NewStore() + c := &criService{ + config: config, + client: client, + os: osinterface.RealOS{}, + sandboxStore: sandboxstore.NewStore(labels), + containerStore: containerstore.NewStore(labels), + imageStore: imagestore.NewStore(client), + snapshotStore: snapshotstore.NewStore(), + sandboxNameIndex: registrar.NewRegistrar(), + containerNameIndex: registrar.NewRegistrar(), + initialized: atomic.NewBool(false), + netPlugin: make(map[string]cni.CNI), + unpackDuplicationSuppressor: kmutex.New(), + } + + if client.SnapshotService(c.config.ContainerdConfig.Snapshotter) == nil { + return nil, fmt.Errorf("failed to find snapshotter %q", c.config.ContainerdConfig.Snapshotter) + } + + c.imageFSPath = imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter) + logrus.Infof("Get image filesystem path %q", c.imageFSPath) + + if err := c.initPlatform(); err != nil { + return nil, fmt.Errorf("initialize platform: %w", err) + } + + // prepare streaming server + c.streamServer, err = newStreamServer(c, config.StreamServerAddress, config.StreamServerPort, config.StreamIdleTimeout) + if err != nil { + return nil, fmt.Errorf("failed to create stream server: %w", err) + } + + c.eventMonitor = newEventMonitor(c) + + c.cniNetConfMonitor = make(map[string]*cniNetConfSyncer) + for name, i := range c.netPlugin { + path := c.config.NetworkPluginConfDir + if name != defaultNetworkPlugin { + if rc, ok := c.config.Runtimes[name]; ok { + path = rc.NetworkPluginConfDir + } + } + if path != "" { + m, err := newCNINetConfSyncer(path, i, c.cniLoadOptions()) + if err != nil { + return nil, fmt.Errorf("failed to create cni conf monitor for %s: %w", name, err) + } + c.cniNetConfMonitor[name] = m + } + } + + // Preload base OCI specs + c.baseOCISpecs, err = loadBaseOCISpecs(&config) + if err != nil { + return nil, err + } + + return c, nil +} + +// Register registers all required services onto a specific grpc server. +// This is used by containerd cri plugin. +func (c *criService) Register(s *grpc.Server) error { + return c.register(s) +} + +// RegisterTCP register all required services onto a GRPC server on TCP. +// This is used by containerd CRI plugin. +func (c *criService) RegisterTCP(s *grpc.Server) error { + if !c.config.DisableTCPService { + return c.register(s) + } + return nil +} + +// Run starts the CRI service. +func (c *criService) Run() error { + logrus.Info("Start subscribing containerd event") + c.eventMonitor.subscribe(c.client) + + logrus.Infof("Start recovering state") + if err := c.recover(ctrdutil.NamespacedContext()); err != nil { + return fmt.Errorf("failed to recover state: %w", err) + } + + // Start event handler. + logrus.Info("Start event monitor") + eventMonitorErrCh := c.eventMonitor.start() + + // Start snapshot stats syncer, it doesn't need to be stopped. + logrus.Info("Start snapshots syncer") + snapshotsSyncer := newSnapshotsSyncer( + c.snapshotStore, + c.client.SnapshotService(c.config.ContainerdConfig.Snapshotter), + time.Duration(c.config.StatsCollectPeriod)*time.Second, + ) + snapshotsSyncer.start() + + // Start CNI network conf syncers + cniNetConfMonitorErrCh := make(chan error, len(c.cniNetConfMonitor)) + var netSyncGroup sync.WaitGroup + for name, h := range c.cniNetConfMonitor { + netSyncGroup.Add(1) + logrus.Infof("Start cni network conf syncer for %s", name) + go func(h *cniNetConfSyncer) { + cniNetConfMonitorErrCh <- h.syncLoop() + netSyncGroup.Done() + }(h) + } + go func() { + netSyncGroup.Wait() + close(cniNetConfMonitorErrCh) + }() + + // Start streaming server. + logrus.Info("Start streaming server") + streamServerErrCh := make(chan error) + go func() { + defer close(streamServerErrCh) + if err := c.streamServer.Start(true); err != nil && err != http.ErrServerClosed { + logrus.WithError(err).Error("Failed to start streaming server") + streamServerErrCh <- err + } + }() + + // Set the server as initialized. GRPC services could start serving traffic. + c.initialized.Set() + + var eventMonitorErr, streamServerErr, cniNetConfMonitorErr error + // Stop the whole CRI service if any of the critical service exits. + select { + case eventMonitorErr = <-eventMonitorErrCh: + case streamServerErr = <-streamServerErrCh: + case cniNetConfMonitorErr = <-cniNetConfMonitorErrCh: + } + if err := c.Close(); err != nil { + return fmt.Errorf("failed to stop cri service: %w", err) + } + // If the error is set above, err from channel must be nil here, because + // the channel is supposed to be closed. Or else, we wait and set it. + if err := <-eventMonitorErrCh; err != nil { + eventMonitorErr = err + } + logrus.Info("Event monitor stopped") + // There is a race condition with http.Server.Serve. + // When `Close` is called at the same time with `Serve`, `Close` + // may finish first, and `Serve` may still block. + // See https://github.com/golang/go/issues/20239. + // Here we set a 2 second timeout for the stream server wait, + // if it timeout, an error log is generated. + // TODO(random-liu): Get rid of this after https://github.com/golang/go/issues/20239 + // is fixed. + const streamServerStopTimeout = 2 * time.Second + select { + case err := <-streamServerErrCh: + if err != nil { + streamServerErr = err + } + logrus.Info("Stream server stopped") + case <-time.After(streamServerStopTimeout): + logrus.Errorf("Stream server is not stopped in %q", streamServerStopTimeout) + } + if eventMonitorErr != nil { + return fmt.Errorf("event monitor error: %w", eventMonitorErr) + } + if streamServerErr != nil { + return fmt.Errorf("stream server error: %w", streamServerErr) + } + if cniNetConfMonitorErr != nil { + return fmt.Errorf("cni network conf monitor error: %w", cniNetConfMonitorErr) + } + return nil +} + +// Close stops the CRI service. +// TODO(random-liu): Make close synchronous. +func (c *criService) Close() error { + logrus.Info("Stop CRI service") + for name, h := range c.cniNetConfMonitor { + if err := h.stop(); err != nil { + logrus.WithError(err).Errorf("failed to stop cni network conf monitor for %s", name) + } + } + c.eventMonitor.stop() + if err := c.streamServer.Stop(); err != nil { + return fmt.Errorf("failed to stop stream server: %w", err) + } + return nil +} + +func (c *criService) register(s *grpc.Server) error { + instrumented := newInstrumentedService(c) + runtime.RegisterRuntimeServiceServer(s, instrumented) + runtime.RegisterImageServiceServer(s, instrumented) + instrumentedAlpha := newInstrumentedAlphaService(c) + runtime_alpha.RegisterRuntimeServiceServer(s, instrumentedAlpha) + runtime_alpha.RegisterImageServiceServer(s, instrumentedAlpha) + return nil +} + +// imageFSPath returns containerd image filesystem path. +// Note that if containerd changes directory layout, we also needs to change this. +func imageFSPath(rootDir, snapshotter string) string { + return filepath.Join(rootDir, fmt.Sprintf("%s.%s", plugin.SnapshotPlugin, snapshotter)) +} + +func loadOCISpec(filename string) (*oci.Spec, error) { + file, err := os.Open(filename) + if err != nil { + return nil, fmt.Errorf("failed to open base OCI spec: %s: %w", filename, err) + } + defer file.Close() + + spec := oci.Spec{} + if err := json.NewDecoder(file).Decode(&spec); err != nil { + return nil, fmt.Errorf("failed to parse base OCI spec file: %w", err) + } + + return &spec, nil +} + +func loadBaseOCISpecs(config *criconfig.Config) (map[string]*oci.Spec, error) { + specs := map[string]*oci.Spec{} + for _, cfg := range config.Runtimes { + if cfg.BaseRuntimeSpec == "" { + continue + } + + // Don't load same file twice + if _, ok := specs[cfg.BaseRuntimeSpec]; ok { + continue + } + + spec, err := loadOCISpec(cfg.BaseRuntimeSpec) + if err != nil { + return nil, fmt.Errorf("failed to load base OCI spec from file: %s: %w", cfg.BaseRuntimeSpec, err) + } + + specs[cfg.BaseRuntimeSpec] = spec + } + + return specs, nil +} diff --git a/pkg/cri/sbserver/service_linux.go b/pkg/cri/sbserver/service_linux.go new file mode 100644 index 000000000..5c6c32b32 --- /dev/null +++ b/pkg/cri/sbserver/service_linux.go @@ -0,0 +1,96 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + + "github.com/containerd/containerd/pkg/cap" + "github.com/containerd/containerd/pkg/userns" + "github.com/containerd/go-cni" + "github.com/opencontainers/selinux/go-selinux" + "github.com/sirupsen/logrus" +) + +// networkAttachCount is the minimum number of networks the PodSandbox +// attaches to +const networkAttachCount = 2 + +// initPlatform handles linux specific initialization for the CRI service. +func (c *criService) initPlatform() (err error) { + if userns.RunningInUserNS() { + if !(c.config.DisableCgroup && !c.apparmorEnabled() && c.config.RestrictOOMScoreAdj) { + logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true") + } + } + + if c.config.EnableSelinux { + if !selinux.GetEnabled() { + logrus.Warn("Selinux is not supported") + } + if r := c.config.SelinuxCategoryRange; r > 0 { + selinux.CategoryRange = uint32(r) + } + } else { + selinux.SetDisabled() + } + + pluginDirs := map[string]string{ + defaultNetworkPlugin: c.config.NetworkPluginConfDir, + } + for name, conf := range c.config.Runtimes { + if conf.NetworkPluginConfDir != "" { + pluginDirs[name] = conf.NetworkPluginConfDir + } + } + + c.netPlugin = make(map[string]cni.CNI) + for name, dir := range pluginDirs { + max := c.config.NetworkPluginMaxConfNum + if name != defaultNetworkPlugin { + if m := c.config.Runtimes[name].NetworkPluginMaxConfNum; m != 0 { + max = m + } + } + // Pod needs to attach to at least loopback network and a non host network, + // hence networkAttachCount is 2. If there are more network configs the + // pod will be attached to all the networks but we will only use the ip + // of the default network interface as the pod IP. + i, err := cni.New(cni.WithMinNetworkCount(networkAttachCount), + cni.WithPluginConfDir(dir), + cni.WithPluginMaxConfNum(max), + cni.WithPluginDir([]string{c.config.NetworkPluginBinDir})) + if err != nil { + return fmt.Errorf("failed to initialize cni: %w", err) + } + c.netPlugin[name] = i + } + + if c.allCaps == nil { + c.allCaps, err = cap.Current() + if err != nil { + return fmt.Errorf("failed to get caps: %w", err) + } + } + + return nil +} + +// cniLoadOptions returns cni load options for the linux. +func (c *criService) cniLoadOptions() []cni.Opt { + return []cni.Opt{cni.WithLoNetwork, cni.WithDefaultConf} +} diff --git a/pkg/cri/sbserver/service_other.go b/pkg/cri/sbserver/service_other.go new file mode 100644 index 000000000..41820e331 --- /dev/null +++ b/pkg/cri/sbserver/service_other.go @@ -0,0 +1,34 @@ +//go:build !windows && !linux +// +build !windows,!linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "github.com/containerd/go-cni" +) + +// initPlatform handles linux specific initialization for the CRI service. +func (c *criService) initPlatform() error { + return nil +} + +// cniLoadOptions returns cni load options for the linux. +func (c *criService) cniLoadOptions() []cni.Opt { + return []cni.Opt{} +} diff --git a/pkg/cri/sbserver/service_test.go b/pkg/cri/sbserver/service_test.go new file mode 100644 index 000000000..70093b1d0 --- /dev/null +++ b/pkg/cri/sbserver/service_test.go @@ -0,0 +1,88 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "encoding/json" + "os" + "testing" + + "github.com/containerd/containerd/oci" + "github.com/containerd/go-cni" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + criconfig "github.com/containerd/containerd/pkg/cri/config" + servertesting "github.com/containerd/containerd/pkg/cri/server/testing" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + "github.com/containerd/containerd/pkg/cri/store/label" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" + ostesting "github.com/containerd/containerd/pkg/os/testing" + "github.com/containerd/containerd/pkg/registrar" +) + +// newTestCRIService creates a fake criService for test. +func newTestCRIService() *criService { + labels := label.NewStore() + return &criService{ + config: testConfig, + imageFSPath: testImageFSPath, + os: ostesting.NewFakeOS(), + sandboxStore: sandboxstore.NewStore(labels), + imageStore: imagestore.NewStore(nil), + snapshotStore: snapshotstore.NewStore(), + sandboxNameIndex: registrar.NewRegistrar(), + containerStore: containerstore.NewStore(labels), + containerNameIndex: registrar.NewRegistrar(), + netPlugin: map[string]cni.CNI{ + defaultNetworkPlugin: servertesting.NewFakeCNIPlugin(), + }, + } +} + +func TestLoadBaseOCISpec(t *testing.T) { + spec := oci.Spec{Version: "1.0.2", Hostname: "default"} + + file, err := os.CreateTemp("", "spec-test-") + require.NoError(t, err) + + defer func() { + assert.NoError(t, file.Close()) + assert.NoError(t, os.RemoveAll(file.Name())) + }() + + err = json.NewEncoder(file).Encode(&spec) + assert.NoError(t, err) + + config := criconfig.Config{} + config.Runtimes = map[string]criconfig.Runtime{ + "runc": {BaseRuntimeSpec: file.Name()}, + } + + specs, err := loadBaseOCISpecs(&config) + assert.NoError(t, err) + + assert.Len(t, specs, 1) + + out, ok := specs[file.Name()] + assert.True(t, ok, "expected spec with file name %q", file.Name()) + + assert.Equal(t, "1.0.2", out.Version) + assert.Equal(t, "default", out.Hostname) +} diff --git a/pkg/cri/sbserver/service_windows.go b/pkg/cri/sbserver/service_windows.go new file mode 100644 index 000000000..f46ee2f5c --- /dev/null +++ b/pkg/cri/sbserver/service_windows.go @@ -0,0 +1,69 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "fmt" + + "github.com/containerd/go-cni" +) + +// windowsNetworkAttachCount is the minimum number of networks the PodSandbox +// attaches to +const windowsNetworkAttachCount = 1 + +// initPlatform handles linux specific initialization for the CRI service. +func (c *criService) initPlatform() error { + pluginDirs := map[string]string{ + defaultNetworkPlugin: c.config.NetworkPluginConfDir, + } + for name, conf := range c.config.Runtimes { + if conf.NetworkPluginConfDir != "" { + pluginDirs[name] = conf.NetworkPluginConfDir + } + } + + c.netPlugin = make(map[string]cni.CNI) + for name, dir := range pluginDirs { + max := c.config.NetworkPluginMaxConfNum + if name != defaultNetworkPlugin { + if m := c.config.Runtimes[name].NetworkPluginMaxConfNum; m != 0 { + max = m + } + } + // For windows, the loopback network is added as default. + // There is no need to explicitly add one hence networkAttachCount is 1. + // If there are more network configs the pod will be attached to all the + // networks but we will only use the ip of the default network interface + // as the pod IP. + i, err := cni.New(cni.WithMinNetworkCount(windowsNetworkAttachCount), + cni.WithPluginConfDir(dir), + cni.WithPluginMaxConfNum(max), + cni.WithPluginDir([]string{c.config.NetworkPluginBinDir})) + if err != nil { + return fmt.Errorf("failed to initialize cni: %w", err) + } + c.netPlugin[name] = i + } + + return nil +} + +// cniLoadOptions returns cni load options for the windows. +func (c *criService) cniLoadOptions() []cni.Opt { + return []cni.Opt{cni.WithDefaultConf} +} diff --git a/pkg/cri/sbserver/snapshots.go b/pkg/cri/sbserver/snapshots.go new file mode 100644 index 000000000..e8cb943da --- /dev/null +++ b/pkg/cri/sbserver/snapshots.go @@ -0,0 +1,120 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "time" + + "github.com/containerd/containerd/errdefs" + snapshot "github.com/containerd/containerd/snapshots" + "github.com/sirupsen/logrus" + + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" +) + +// snapshotsSyncer syncs snapshot stats periodically. imagefs info and container stats +// should both use cached result here. +// TODO(random-liu): Benchmark with high workload. We may need a statsSyncer instead if +// benchmark result shows that container cpu/memory stats also need to be cached. +type snapshotsSyncer struct { + store *snapshotstore.Store + snapshotter snapshot.Snapshotter + syncPeriod time.Duration +} + +// newSnapshotsSyncer creates a snapshot syncer. +func newSnapshotsSyncer(store *snapshotstore.Store, snapshotter snapshot.Snapshotter, + period time.Duration) *snapshotsSyncer { + return &snapshotsSyncer{ + store: store, + snapshotter: snapshotter, + syncPeriod: period, + } +} + +// start starts the snapshots syncer. No stop function is needed because +// the syncer doesn't update any persistent states, it's fine to let it +// exit with the process. +func (s *snapshotsSyncer) start() { + tick := time.NewTicker(s.syncPeriod) + go func() { + defer tick.Stop() + // TODO(random-liu): This is expensive. We should do benchmark to + // check the resource usage and optimize this. + for { + if err := s.sync(); err != nil { + logrus.WithError(err).Error("Failed to sync snapshot stats") + } + <-tick.C + } + }() +} + +// sync updates all snapshots stats. +func (s *snapshotsSyncer) sync() error { + ctx := ctrdutil.NamespacedContext() + start := time.Now().UnixNano() + var snapshots []snapshot.Info + // Do not call `Usage` directly in collect function, because + // `Usage` takes time, we don't want `Walk` to hold read lock + // of snapshot metadata store for too long time. + // TODO(random-liu): Set timeout for the following 2 contexts. + if err := s.snapshotter.Walk(ctx, func(ctx context.Context, info snapshot.Info) error { + snapshots = append(snapshots, info) + return nil + }); err != nil { + return fmt.Errorf("walk all snapshots failed: %w", err) + } + for _, info := range snapshots { + sn, err := s.store.Get(info.Name) + if err == nil { + // Only update timestamp for non-active snapshot. + if sn.Kind == info.Kind && sn.Kind != snapshot.KindActive { + sn.Timestamp = time.Now().UnixNano() + s.store.Add(sn) + continue + } + } + // Get newest stats if the snapshot is new or active. + sn = snapshotstore.Snapshot{ + Key: info.Name, + Kind: info.Kind, + Timestamp: time.Now().UnixNano(), + } + usage, err := s.snapshotter.Usage(ctx, info.Name) + if err != nil { + if !errdefs.IsNotFound(err) { + logrus.WithError(err).Errorf("Failed to get usage for snapshot %q", info.Name) + } + continue + } + sn.Size = uint64(usage.Size) + sn.Inodes = uint64(usage.Inodes) + s.store.Add(sn) + } + for _, sn := range s.store.List() { + if sn.Timestamp >= start { + continue + } + // Delete the snapshot stats if it's not updated this time. + s.store.Delete(sn.Key) + } + return nil +} diff --git a/pkg/cri/sbserver/status.go b/pkg/cri/sbserver/status.go new file mode 100644 index 000000000..f8a857b8e --- /dev/null +++ b/pkg/cri/sbserver/status.go @@ -0,0 +1,98 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "encoding/json" + "fmt" + goruntime "runtime" + + "github.com/containerd/containerd/log" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// networkNotReadyReason is the reason reported when network is not ready. +const networkNotReadyReason = "NetworkPluginNotReady" + +// Status returns the status of the runtime. +func (c *criService) Status(ctx context.Context, r *runtime.StatusRequest) (*runtime.StatusResponse, error) { + // As a containerd plugin, if CRI plugin is serving request, + // containerd must be ready. + runtimeCondition := &runtime.RuntimeCondition{ + Type: runtime.RuntimeReady, + Status: true, + } + networkCondition := &runtime.RuntimeCondition{ + Type: runtime.NetworkReady, + Status: true, + } + netPlugin := c.netPlugin[defaultNetworkPlugin] + // Check the status of the cni initialization + if netPlugin != nil { + if err := netPlugin.Status(); err != nil { + networkCondition.Status = false + networkCondition.Reason = networkNotReadyReason + networkCondition.Message = fmt.Sprintf("Network plugin returns error: %v", err) + } + } + + resp := &runtime.StatusResponse{ + Status: &runtime.RuntimeStatus{Conditions: []*runtime.RuntimeCondition{ + runtimeCondition, + networkCondition, + }}, + } + if r.Verbose { + configByt, err := json.Marshal(c.config) + if err != nil { + return nil, err + } + resp.Info = make(map[string]string) + resp.Info["config"] = string(configByt) + versionByt, err := json.Marshal(goruntime.Version()) + if err != nil { + return nil, err + } + resp.Info["golang"] = string(versionByt) + + if netPlugin != nil { + cniConfig, err := json.Marshal(netPlugin.GetConfig()) + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to marshal CNI config %v", err) + } + resp.Info["cniconfig"] = string(cniConfig) + } + + defaultStatus := "OK" + for name, h := range c.cniNetConfMonitor { + s := "OK" + if h == nil { + continue + } + if lerr := h.lastStatus(); lerr != nil { + s = lerr.Error() + } + resp.Info[fmt.Sprintf("lastCNILoadStatus.%s", name)] = s + if name == defaultNetworkPlugin { + defaultStatus = s + } + } + resp.Info["lastCNILoadStatus"] = defaultStatus + } + return resp, nil +} diff --git a/pkg/cri/sbserver/streaming.go b/pkg/cri/sbserver/streaming.go new file mode 100644 index 000000000..e2ba8fa14 --- /dev/null +++ b/pkg/cri/sbserver/streaming.go @@ -0,0 +1,240 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "crypto/tls" + "errors" + "fmt" + "io" + "math" + "net" + "os" + "time" + + k8snet "k8s.io/apimachinery/pkg/util/net" + "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/remotecommand" + k8scert "k8s.io/client-go/util/cert" + "k8s.io/utils/exec" + + "github.com/containerd/containerd/pkg/cri/streaming" + ctrdutil "github.com/containerd/containerd/pkg/cri/util" +) + +type streamListenerMode int + +const ( + x509KeyPairTLS streamListenerMode = iota + selfSignTLS + withoutTLS +) + +func getStreamListenerMode(c *criService) (streamListenerMode, error) { + if c.config.EnableTLSStreaming { + if c.config.X509KeyPairStreaming.TLSCertFile != "" && c.config.X509KeyPairStreaming.TLSKeyFile != "" { + return x509KeyPairTLS, nil + } + if c.config.X509KeyPairStreaming.TLSCertFile != "" && c.config.X509KeyPairStreaming.TLSKeyFile == "" { + return -1, errors.New("must set X509KeyPairStreaming.TLSKeyFile") + } + if c.config.X509KeyPairStreaming.TLSCertFile == "" && c.config.X509KeyPairStreaming.TLSKeyFile != "" { + return -1, errors.New("must set X509KeyPairStreaming.TLSCertFile") + } + return selfSignTLS, nil + } + if c.config.X509KeyPairStreaming.TLSCertFile != "" { + return -1, errors.New("X509KeyPairStreaming.TLSCertFile is set but EnableTLSStreaming is not set") + } + if c.config.X509KeyPairStreaming.TLSKeyFile != "" { + return -1, errors.New("X509KeyPairStreaming.TLSKeyFile is set but EnableTLSStreaming is not set") + } + return withoutTLS, nil +} + +func newStreamServer(c *criService, addr, port, streamIdleTimeout string) (streaming.Server, error) { + if addr == "" { + a, err := k8snet.ResolveBindAddress(nil) + if err != nil { + return nil, fmt.Errorf("failed to get stream server address: %w", err) + } + addr = a.String() + } + config := streaming.DefaultConfig + if streamIdleTimeout != "" { + var err error + config.StreamIdleTimeout, err = time.ParseDuration(streamIdleTimeout) + if err != nil { + return nil, fmt.Errorf("invalid stream idle timeout: %w", err) + } + } + config.Addr = net.JoinHostPort(addr, port) + run := newStreamRuntime(c) + tlsMode, err := getStreamListenerMode(c) + if err != nil { + return nil, fmt.Errorf("invalid stream server configuration: %w", err) + } + switch tlsMode { + case x509KeyPairTLS: + tlsCert, err := tls.LoadX509KeyPair(c.config.X509KeyPairStreaming.TLSCertFile, c.config.X509KeyPairStreaming.TLSKeyFile) + if err != nil { + return nil, fmt.Errorf("failed to load x509 key pair for stream server: %w", err) + } + config.TLSConfig = &tls.Config{ + Certificates: []tls.Certificate{tlsCert}, + } + return streaming.NewServer(config, run) + case selfSignTLS: + tlsCert, err := newTLSCert() + if err != nil { + return nil, fmt.Errorf("failed to generate tls certificate for stream server: %w", err) + } + config.TLSConfig = &tls.Config{ + Certificates: []tls.Certificate{tlsCert}, + InsecureSkipVerify: true, + } + return streaming.NewServer(config, run) + case withoutTLS: + return streaming.NewServer(config, run) + default: + return nil, errors.New("invalid configuration for the stream listener") + } +} + +type streamRuntime struct { + c *criService +} + +func newStreamRuntime(c *criService) streaming.Runtime { + return &streamRuntime{c: c} +} + +// Exec executes a command inside the container. exec.ExitError is returned if the command +// returns non-zero exit code. +func (s *streamRuntime) Exec(containerID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, + tty bool, resize <-chan remotecommand.TerminalSize) error { + exitCode, err := s.c.execInContainer(ctrdutil.NamespacedContext(), containerID, execOptions{ + cmd: cmd, + stdin: stdin, + stdout: stdout, + stderr: stderr, + tty: tty, + resize: resize, + }) + if err != nil { + return fmt.Errorf("failed to exec in container: %w", err) + } + if *exitCode == 0 { + return nil + } + return &exec.CodeExitError{ + Err: fmt.Errorf("error executing command %v, exit code %d", cmd, *exitCode), + Code: int(*exitCode), + } +} + +func (s *streamRuntime) Attach(containerID string, in io.Reader, out, err io.WriteCloser, tty bool, + resize <-chan remotecommand.TerminalSize) error { + return s.c.attachContainer(ctrdutil.NamespacedContext(), containerID, in, out, err, tty, resize) +} + +func (s *streamRuntime) PortForward(podSandboxID string, port int32, stream io.ReadWriteCloser) error { + if port <= 0 || port > math.MaxUint16 { + return fmt.Errorf("invalid port %d", port) + } + ctx := ctrdutil.NamespacedContext() + return s.c.portForward(ctx, podSandboxID, port, stream) +} + +// handleResizing spawns a goroutine that processes the resize channel, calling resizeFunc for each +// remotecommand.TerminalSize received from the channel. +func handleResizing(ctx context.Context, resize <-chan remotecommand.TerminalSize, resizeFunc func(size remotecommand.TerminalSize)) { + if resize == nil { + return + } + + go func() { + defer runtime.HandleCrash() + + for { + select { + case <-ctx.Done(): + return + case size, ok := <-resize: + if !ok { + return + } + if size.Height < 1 || size.Width < 1 { + continue + } + resizeFunc(size) + } + } + }() +} + +// newTLSCert returns a self CA signed tls.certificate. +// TODO (mikebrow): replace / rewrite this function to support using CA +// signing of the certificate. Requires a security plan for kubernetes regarding +// CRI connections / streaming, etc. For example, kubernetes could configure or +// require a CA service and pass a configuration down through CRI. +func newTLSCert() (tls.Certificate, error) { + fail := func(err error) (tls.Certificate, error) { return tls.Certificate{}, err } + + hostName, err := os.Hostname() + if err != nil { + return fail(fmt.Errorf("failed to get hostname: %w", err)) + } + + addrs, err := net.InterfaceAddrs() + if err != nil { + return fail(fmt.Errorf("failed to get host IP addresses: %w", err)) + } + + var alternateIPs []net.IP + var alternateDNS []string + for _, addr := range addrs { + var ip net.IP + + switch v := addr.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + default: + continue + } + + alternateIPs = append(alternateIPs, ip) + alternateDNS = append(alternateDNS, ip.String()) + } + + // Generate a self signed certificate key (CA is self) + certPem, keyPem, err := k8scert.GenerateSelfSignedCertKey(hostName, alternateIPs, alternateDNS) + if err != nil { + return fail(fmt.Errorf("certificate key could not be created: %w", err)) + } + + // Load the tls certificate + tlsCert, err := tls.X509KeyPair(certPem, keyPem) + if err != nil { + return fail(fmt.Errorf("certificate could not be loaded: %w", err)) + } + + return tlsCert, nil +} diff --git a/pkg/cri/sbserver/streaming_test.go b/pkg/cri/sbserver/streaming_test.go new file mode 100644 index 000000000..7d43cdb18 --- /dev/null +++ b/pkg/cri/sbserver/streaming_test.go @@ -0,0 +1,153 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + + "github.com/containerd/containerd/pkg/cri/config" + "github.com/stretchr/testify/assert" +) + +func TestValidateStreamServer(t *testing.T) { + for desc, test := range map[string]struct { + *criService + tlsMode streamListenerMode + expectErr bool + }{ + "should pass with default withoutTLS": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.DefaultConfig(), + }, + }, + tlsMode: withoutTLS, + expectErr: false, + }, + "should pass with x509KeyPairTLS": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: true, + X509KeyPairStreaming: config.X509KeyPairStreaming{ + TLSKeyFile: "non-empty", + TLSCertFile: "non-empty", + }, + }, + }, + }, + tlsMode: x509KeyPairTLS, + expectErr: false, + }, + "should pass with selfSign": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: true, + }, + }, + }, + tlsMode: selfSignTLS, + expectErr: false, + }, + "should return error with X509 keypair but not EnableTLSStreaming": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: false, + X509KeyPairStreaming: config.X509KeyPairStreaming{ + TLSKeyFile: "non-empty", + TLSCertFile: "non-empty", + }, + }, + }, + }, + tlsMode: -1, + expectErr: true, + }, + "should return error with X509 TLSCertFile empty": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: true, + X509KeyPairStreaming: config.X509KeyPairStreaming{ + TLSKeyFile: "non-empty", + TLSCertFile: "", + }, + }, + }, + }, + tlsMode: -1, + expectErr: true, + }, + "should return error with X509 TLSKeyFile empty": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: true, + X509KeyPairStreaming: config.X509KeyPairStreaming{ + TLSKeyFile: "", + TLSCertFile: "non-empty", + }, + }, + }, + }, + tlsMode: -1, + expectErr: true, + }, + "should return error without EnableTLSStreaming and only TLSCertFile set": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: false, + X509KeyPairStreaming: config.X509KeyPairStreaming{ + TLSKeyFile: "", + TLSCertFile: "non-empty", + }, + }, + }, + }, + tlsMode: -1, + expectErr: true, + }, + "should return error without EnableTLSStreaming and only TLSKeyFile set": { + criService: &criService{ + config: config.Config{ + PluginConfig: config.PluginConfig{ + EnableTLSStreaming: false, + X509KeyPairStreaming: config.X509KeyPairStreaming{ + TLSKeyFile: "non-empty", + TLSCertFile: "", + }, + }, + }, + }, + tlsMode: -1, + expectErr: true, + }, + } { + t.Run(desc, func(t *testing.T) { + tlsMode, err := getStreamListenerMode(test.criService) + if test.expectErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + assert.Equal(t, test.tlsMode, tlsMode) + }) + } +} diff --git a/pkg/cri/sbserver/test_config.go b/pkg/cri/sbserver/test_config.go new file mode 100644 index 000000000..06e1f329b --- /dev/null +++ b/pkg/cri/sbserver/test_config.go @@ -0,0 +1,38 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import criconfig "github.com/containerd/containerd/pkg/cri/config" + +const ( + testRootDir = "/test/root" + testStateDir = "/test/state" + // Use an image id as test sandbox image to avoid image name resolve. + // TODO(random-liu): Change this to image name after we have complete image + // management unit test framework. + testSandboxImage = "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113798" + testImageFSPath = "/test/image/fs/path" +) + +var testConfig = criconfig.Config{ + RootDir: testRootDir, + StateDir: testStateDir, + PluginConfig: criconfig.PluginConfig{ + SandboxImage: testSandboxImage, + TolerateMissingHugetlbController: true, + }, +} diff --git a/pkg/cri/sbserver/update_runtime_config.go b/pkg/cri/sbserver/update_runtime_config.go new file mode 100644 index 000000000..db21b0785 --- /dev/null +++ b/pkg/cri/sbserver/update_runtime_config.go @@ -0,0 +1,133 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + "strings" + "text/template" + + "github.com/containerd/containerd/log" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// cniConfigTemplate contains the values containerd will overwrite +// in the cni config template. +type cniConfigTemplate struct { + // PodCIDR is the cidr for pods on the node. + PodCIDR string + // PodCIDRRanges is the cidr ranges for pods on the node. + PodCIDRRanges []string + // Routes is a list of routes configured. + Routes []string +} + +const ( + // cniConfigFileName is the name of cni config file generated by containerd. + cniConfigFileName = "10-containerd-net.conflist" + // zeroCIDRv6 is the null route for IPv6. + zeroCIDRv6 = "::/0" + // zeroCIDRv4 is the null route for IPv4. + zeroCIDRv4 = "0.0.0.0/0" +) + +// UpdateRuntimeConfig updates the runtime config. Currently only handles podCIDR updates. +func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (*runtime.UpdateRuntimeConfigResponse, error) { + podCIDRs := r.GetRuntimeConfig().GetNetworkConfig().GetPodCidr() + if podCIDRs == "" { + return &runtime.UpdateRuntimeConfigResponse{}, nil + } + cidrs := strings.Split(podCIDRs, ",") + for i := range cidrs { + cidrs[i] = strings.TrimSpace(cidrs[i]) + } + routes, err := getRoutes(cidrs) + if err != nil { + return nil, fmt.Errorf("get routes: %w", err) + } + + confTemplate := c.config.NetworkPluginConfTemplate + if confTemplate == "" { + log.G(ctx).Info("No cni config template is specified, wait for other system components to drop the config.") + return &runtime.UpdateRuntimeConfigResponse{}, nil + } + netPlugin := c.netPlugin[defaultNetworkPlugin] + if netPlugin == nil { + log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) + return &runtime.UpdateRuntimeConfigResponse{}, nil + } + if err := netPlugin.Status(); err == nil { + log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) + return &runtime.UpdateRuntimeConfigResponse{}, nil + } else if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { + log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) + return &runtime.UpdateRuntimeConfigResponse{}, nil + } + log.G(ctx).Infof("Generating cni config from template %q", confTemplate) + // generate cni config file from the template with updated pod cidr. + t, err := template.ParseFiles(confTemplate) + if err != nil { + return nil, fmt.Errorf("failed to parse cni config template %q: %w", confTemplate, err) + } + if err := os.MkdirAll(c.config.NetworkPluginConfDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create cni config directory: %q: %w", c.config.NetworkPluginConfDir, err) + } + confFile := filepath.Join(c.config.NetworkPluginConfDir, cniConfigFileName) + f, err := os.OpenFile(confFile, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return nil, fmt.Errorf("failed to open cni config file %q: %w", confFile, err) + } + defer f.Close() + if err := t.Execute(f, cniConfigTemplate{ + PodCIDR: cidrs[0], + PodCIDRRanges: cidrs, + Routes: routes, + }); err != nil { + return nil, fmt.Errorf("failed to generate cni config file %q: %w", confFile, err) + } + return &runtime.UpdateRuntimeConfigResponse{}, nil +} + +// getRoutes generates required routes for the passed in cidrs. +func getRoutes(cidrs []string) ([]string, error) { + var ( + routes []string + hasV4, hasV6 bool + ) + for _, c := range cidrs { + _, cidr, err := net.ParseCIDR(c) + if err != nil { + return nil, err + } + if cidr.IP.To4() != nil { + hasV4 = true + } else { + hasV6 = true + } + } + if hasV4 { + routes = append(routes, zeroCIDRv4) + } + if hasV6 { + routes = append(routes, zeroCIDRv6) + } + return routes, nil +} diff --git a/pkg/cri/sbserver/update_runtime_config_test.go b/pkg/cri/sbserver/update_runtime_config_test.go new file mode 100644 index 000000000..6bbb64433 --- /dev/null +++ b/pkg/cri/sbserver/update_runtime_config_test.go @@ -0,0 +1,137 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + "errors" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + criconfig "github.com/containerd/containerd/pkg/cri/config" + servertesting "github.com/containerd/containerd/pkg/cri/server/testing" +) + +func TestUpdateRuntimeConfig(t *testing.T) { + const ( + testTemplate = ` +{ + "name": "test-pod-network", + "cniVersion": "1.0.0", + "plugins": [ + { + "type": "ptp", + "mtu": 1460, + "ipam": { + "type": "host-local", + "subnet": "{{.PodCIDR}}", + "ranges": [{{range $i, $range := .PodCIDRRanges}}{{if $i}}, {{end}}[{"subnet": "{{$range}}"}]{{end}}], + "routes": [{{range $i, $route := .Routes}}{{if $i}}, {{end}}{"dst": "{{$route}}"}{{end}}] + } + }, + ] +}` + testCIDR = "10.0.0.0/24, 2001:4860:4860::/64" + expected = ` +{ + "name": "test-pod-network", + "cniVersion": "1.0.0", + "plugins": [ + { + "type": "ptp", + "mtu": 1460, + "ipam": { + "type": "host-local", + "subnet": "10.0.0.0/24", + "ranges": [[{"subnet": "10.0.0.0/24"}], [{"subnet": "2001:4860:4860::/64"}]], + "routes": [{"dst": "0.0.0.0/0"}, {"dst": "::/0"}] + } + }, + ] +}` + ) + + for name, test := range map[string]struct { + noTemplate bool + emptyCIDR bool + networkReady bool + expectCNIConfig bool + }{ + "should not generate cni config if cidr is empty": { + emptyCIDR: true, + expectCNIConfig: false, + }, + "should not generate cni config if template file is not specified": { + noTemplate: true, + expectCNIConfig: false, + }, + "should not generate cni config if network is ready": { + networkReady: true, + expectCNIConfig: false, + }, + "should generate cni config if template is specified and cidr is provided": { + expectCNIConfig: true, + }, + } { + t.Run(name, func(t *testing.T) { + testDir := t.TempDir() + templateName := filepath.Join(testDir, "template") + err := os.WriteFile(templateName, []byte(testTemplate), 0666) + require.NoError(t, err) + confDir := filepath.Join(testDir, "net.d") + confName := filepath.Join(confDir, cniConfigFileName) + + c := newTestCRIService() + c.config.CniConfig = criconfig.CniConfig{ + NetworkPluginConfDir: confDir, + NetworkPluginConfTemplate: templateName, + } + req := &runtime.UpdateRuntimeConfigRequest{ + RuntimeConfig: &runtime.RuntimeConfig{ + NetworkConfig: &runtime.NetworkConfig{ + PodCidr: testCIDR, + }, + }, + } + if test.noTemplate { + c.config.CniConfig.NetworkPluginConfTemplate = "" + } + if test.emptyCIDR { + req.RuntimeConfig.NetworkConfig.PodCidr = "" + } + if !test.networkReady { + c.netPlugin[defaultNetworkPlugin].(*servertesting.FakeCNIPlugin).StatusErr = errors.New("random error") + c.netPlugin[defaultNetworkPlugin].(*servertesting.FakeCNIPlugin).LoadErr = errors.New("random error") + } + _, err = c.UpdateRuntimeConfig(context.Background(), req) + assert.NoError(t, err) + if !test.expectCNIConfig { + _, err := os.Stat(confName) + assert.Error(t, err) + } else { + got, err := os.ReadFile(confName) + assert.NoError(t, err) + assert.Equal(t, expected, string(got)) + } + }) + } +} diff --git a/pkg/cri/sbserver/version.go b/pkg/cri/sbserver/version.go new file mode 100644 index 000000000..20a754e72 --- /dev/null +++ b/pkg/cri/sbserver/version.go @@ -0,0 +1,54 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "context" + + "github.com/containerd/containerd/version" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + runtime_alpha "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + + "github.com/containerd/containerd/pkg/cri/constants" +) + +const ( + containerName = "containerd" + // kubeAPIVersion is the api version of kubernetes. + // TODO(random-liu): Change this to actual CRI version. + kubeAPIVersion = "0.1.0" +) + +// Version returns the runtime name, runtime version and runtime API version. +func (c *criService) Version(ctx context.Context, r *runtime.VersionRequest) (*runtime.VersionResponse, error) { + return &runtime.VersionResponse{ + Version: kubeAPIVersion, + RuntimeName: containerName, + RuntimeVersion: version.Version, + RuntimeApiVersion: constants.CRIVersion, + }, nil +} + +// Version returns the runtime name, runtime version and runtime API version. +func (c *criService) AlphaVersion(ctx context.Context, r *runtime_alpha.VersionRequest) (*runtime_alpha.VersionResponse, error) { + return &runtime_alpha.VersionResponse{ + Version: kubeAPIVersion, + RuntimeName: containerName, + RuntimeVersion: version.Version, + RuntimeApiVersion: constants.CRIVersionAlpha, + }, nil +}