Move CRI from pkg/ to internal/

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
This commit is contained in:
Maksym Pavlenko
2024-02-02 09:45:44 -08:00
parent db1e16da34
commit bbac058cf3
215 changed files with 254 additions and 254 deletions

View File

@@ -0,0 +1,45 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"github.com/containerd/containerd/v2/pkg/blockio"
"github.com/containerd/log"
)
// blockIOClassFromAnnotations examines container and pod annotations of a
// container and returns its effective blockio class.
func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
cls, err := blockio.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations)
if err != nil {
return "", err
}
if cls != "" && !blockio.IsEnabled() {
if c.config.ContainerdConfig.IgnoreBlockIONotEnabledErrors {
cls = ""
log.L.Debugf("continuing create container %s, ignoring blockio not enabled (%v)", containerName, err)
} else {
return "", fmt.Errorf("blockio disabled, refusing to set blockio class of container %q to %q", containerName, cls)
}
}
return cls, nil
}

View File

@@ -0,0 +1,23 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
return "", nil
}

View File

@@ -0,0 +1,133 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"os"
"path/filepath"
"sync"
"github.com/containerd/go-cni"
"github.com/containerd/log"
"github.com/fsnotify/fsnotify"
)
// cniNetConfSyncer is used to reload cni network conf triggered by fs change
// events.
type cniNetConfSyncer struct {
// only used for lastSyncStatus
sync.RWMutex
lastSyncStatus error
watcher *fsnotify.Watcher
confDir string
netPlugin cni.CNI
loadOpts []cni.Opt
}
// newCNINetConfSyncer creates cni network conf syncer.
func newCNINetConfSyncer(confDir string, netPlugin cni.CNI, loadOpts []cni.Opt) (*cniNetConfSyncer, error) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return nil, fmt.Errorf("failed to create fsnotify watcher: %w", err)
}
// /etc/cni has to be readable for non-root users (0755), because /etc/cni/tuning/allowlist.conf is used for rootless mode too.
// This file was introduced in CNI plugins 1.2.0 (https://github.com/containernetworking/plugins/pull/693), and its path is hard-coded.
confDirParent := filepath.Dir(confDir)
if err := os.MkdirAll(confDirParent, 0755); err != nil {
return nil, fmt.Errorf("failed to create the parent of the cni conf dir=%s: %w", confDirParent, err)
}
if err := os.MkdirAll(confDir, 0700); err != nil {
return nil, fmt.Errorf("failed to create cni conf dir=%s for watch: %w", confDir, err)
}
if err := watcher.Add(confDir); err != nil {
return nil, fmt.Errorf("failed to watch cni conf dir %s: %w", confDir, err)
}
syncer := &cniNetConfSyncer{
watcher: watcher,
confDir: confDir,
netPlugin: netPlugin,
loadOpts: loadOpts,
}
if err := syncer.netPlugin.Load(syncer.loadOpts...); err != nil {
log.L.WithError(err).Error("failed to load cni during init, please check CRI plugin status before setting up network for pods")
syncer.updateLastStatus(err)
}
return syncer, nil
}
// syncLoop monitors any fs change events from cni conf dir and tries to reload
// cni configuration.
func (syncer *cniNetConfSyncer) syncLoop() error {
for {
select {
case event, ok := <-syncer.watcher.Events:
if !ok {
log.L.Debugf("cni watcher channel is closed")
return nil
}
// Only reload config when receiving write/rename/remove
// events
//
// TODO(fuweid): Might only reload target cni config
// files to prevent no-ops.
if event.Has(fsnotify.Chmod) || event.Has(fsnotify.Create) {
log.L.Debugf("ignore event from cni conf dir: %s", event)
continue
}
log.L.Debugf("receiving change event from cni conf dir: %s", event)
lerr := syncer.netPlugin.Load(syncer.loadOpts...)
if lerr != nil {
log.L.WithError(lerr).
Errorf("failed to reload cni configuration after receiving fs change event(%s)", event)
}
syncer.updateLastStatus(lerr)
case err := <-syncer.watcher.Errors:
if err != nil {
log.L.WithError(err).Error("failed to continue sync cni conf change")
return err
}
}
}
}
// lastStatus retrieves last sync status.
func (syncer *cniNetConfSyncer) lastStatus() error {
syncer.RLock()
defer syncer.RUnlock()
return syncer.lastSyncStatus
}
// updateLastStatus will be called after every single cni load.
func (syncer *cniNetConfSyncer) updateLastStatus(err error) {
syncer.Lock()
defer syncer.Unlock()
syncer.lastSyncStatus = err
}
// stop stops watcher in the syncLoop.
func (syncer *cniNetConfSyncer) stop() error {
return syncer.watcher.Close()
}

View File

@@ -0,0 +1,84 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"io"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/log"
"k8s.io/client-go/tools/remotecommand"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/internal/cri/io"
)
// Attach prepares a streaming endpoint to attach to a running container, and returns the address.
func (c *criService) Attach(ctx context.Context, r *runtime.AttachRequest) (*runtime.AttachResponse, error) {
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container in store: %w", err)
}
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.streamServer.GetAttach(r)
}
func (c *criService) attachContainer(ctx context.Context, id string, stdin io.Reader, stdout, stderr io.WriteCloser,
tty bool, resize <-chan remotecommand.TerminalSize) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Get container from our container store.
cntr, err := c.containerStore.Get(id)
if err != nil {
return fmt.Errorf("failed to find container %q in store: %w", id, err)
}
id = cntr.ID
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
task, err := cntr.Container.Task(ctx, nil)
if err != nil {
return fmt.Errorf("failed to load task: %w", err)
}
handleResizing(ctx, resize, func(size remotecommand.TerminalSize) {
if err := task.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to resize task %q console", id)
}
})
opts := cio.AttachOptions{
Stdin: stdin,
Stdout: stdout,
Stderr: stderr,
Tty: tty,
StdinOnce: cntr.Config.StdinOnce,
CloseStdin: func() error {
return task.CloseIO(ctx, containerd.WithStdinCloser)
},
}
// TODO(random-liu): Figure out whether we need to support historical output.
cntr.IO.Attach(opts)
return nil
}

View File

@@ -0,0 +1,29 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) {
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,270 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/contrib/apparmor"
"github.com/containerd/containerd/v2/contrib/seccomp"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/oci"
customopts "github.com/containerd/containerd/v2/internal/cri/opts"
)
const (
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
// runtimeDefault indicates that we should use or create a runtime default profile.
runtimeDefault = "runtime/default"
// dockerDefault indicates that we should use or create a docker default profile.
dockerDefault = "docker/default"
// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
appArmorDefaultProfileName = "cri-containerd.apparmor.d"
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
unconfinedProfile = "unconfined"
// seccompDefaultProfile is the default seccomp profile.
seccompDefaultProfile = dockerDefault
)
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
var (
specOpts []oci.SpecOpts
err error
)
securityContext := config.GetLinux().GetSecurityContext()
userstr := "0" // runtime default
if securityContext.GetRunAsUsername() != "" {
userstr = securityContext.GetRunAsUsername()
} else if securityContext.GetRunAsUser() != nil {
userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
} else if imageConfig.User != "" {
userstr, _, _ = strings.Cut(imageConfig.User, ":")
}
specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr),
customopts.WithSupplementalGroups(securityContext.GetSupplementalGroups()))
asp := securityContext.GetApparmor()
if asp == nil {
asp, err = generateApparmorSecurityProfile(securityContext.GetApparmorProfile()) //nolint:staticcheck // Deprecated but we don't want to remove yet
if err != nil {
return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
}
}
apparmorSpecOpts, err := generateApparmorSpecOpts(
asp,
securityContext.GetPrivileged(),
c.apparmorEnabled())
if err != nil {
return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
}
if apparmorSpecOpts != nil {
specOpts = append(specOpts, apparmorSpecOpts)
}
ssp := securityContext.GetSeccomp()
if ssp == nil {
ssp, err = generateSeccompSecurityProfile(
securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet
c.config.UnsetSeccompProfile)
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
}
}
seccompSpecOpts, err := c.generateSeccompSpecOpts(
ssp,
securityContext.GetPrivileged(),
c.seccompEnabled())
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
if c.config.EnableCDI {
specOpts = append(specOpts, customopts.WithCDI(config.Annotations, config.CDIDevices))
}
return specOpts, nil
}
func generateSeccompSecurityProfile(profilePath string, unsetProfilePath string) (*runtime.SecurityProfile, error) {
if profilePath != "" {
return generateSecurityProfile(profilePath)
}
if unsetProfilePath != "" {
return generateSecurityProfile(unsetProfilePath)
}
return nil, nil
}
func generateApparmorSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
if profilePath != "" {
return generateSecurityProfile(profilePath)
}
return nil, nil
}
func generateSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
switch profilePath {
case runtimeDefault, dockerDefault, "":
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_RuntimeDefault,
}, nil
case unconfinedProfile:
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_Unconfined,
}, nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(profilePath, profileNamePrefix) {
return nil, fmt.Errorf("invalid profile %q", profilePath)
}
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_Localhost,
LocalhostRef: strings.TrimPrefix(profilePath, profileNamePrefix),
}, nil
}
}
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
func (c *criService) generateSeccompSpecOpts(sp *runtime.SecurityProfile, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
if privileged {
// Do not set seccomp profile when container is privileged
return nil, nil
}
if !seccompEnabled {
if sp != nil {
if sp.ProfileType != runtime.SecurityProfile_Unconfined {
return nil, errors.New("seccomp is not supported")
}
}
return nil, nil
}
if sp == nil {
return nil, nil
}
if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
return nil, errors.New("seccomp config invalid LocalhostRef must only be set if ProfileType is Localhost")
}
switch sp.ProfileType {
case runtime.SecurityProfile_Unconfined:
// Do not set seccomp profile.
return nil, nil
case runtime.SecurityProfile_RuntimeDefault:
return seccomp.WithDefaultProfile(), nil
case runtime.SecurityProfile_Localhost:
// trimming the localhost/ prefix just in case even though it should not
// be necessary with the new SecurityProfile struct
return seccomp.WithProfile(strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)), nil
default:
return nil, errors.New("seccomp unknown ProfileType")
}
}
// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
func generateApparmorSpecOpts(sp *runtime.SecurityProfile, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
if !apparmorEnabled {
// Should fail loudly if user try to specify apparmor profile
// but we don't support it.
if sp != nil {
if sp.ProfileType != runtime.SecurityProfile_Unconfined {
return nil, errors.New("apparmor is not supported")
}
}
return nil, nil
}
if sp == nil {
// Based on kubernetes#51746, default apparmor profile should be applied
// for when apparmor is not specified.
sp, _ = generateSecurityProfile("")
}
if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
return nil, errors.New("apparmor config invalid LocalhostRef must only be set if ProfileType is Localhost")
}
switch sp.ProfileType {
case runtime.SecurityProfile_Unconfined:
// Do not set apparmor profile.
return nil, nil
case runtime.SecurityProfile_RuntimeDefault:
if privileged {
// Do not set apparmor profile when container is privileged
return nil, nil
}
// TODO (mikebrow): delete created apparmor default profile
return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
case runtime.SecurityProfile_Localhost:
// trimming the localhost/ prefix just in case even through it should not
// be necessary with the new SecurityProfile struct
appArmorProfile := strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)
if profileExists, err := appArmorProfileExists(appArmorProfile); !profileExists {
if err != nil {
return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
}
return nil, fmt.Errorf("apparmor profile not found %s", appArmorProfile)
}
return apparmor.WithProfile(appArmorProfile), nil
default:
return nil, errors.New("apparmor unknown ProfileType")
}
}
// appArmorProfileExists scans apparmor/profiles for the requested profile
func appArmorProfileExists(profile string) (bool, error) {
if profile == "" {
return false, errors.New("nil apparmor profile is not supported")
}
profiles, err := os.Open("/sys/kernel/security/apparmor/profiles")
if err != nil {
return false, err
}
defer profiles.Close()
rbuff := bufio.NewReader(profiles)
for {
line, err := rbuff.ReadString('\n')
switch err {
case nil:
if strings.HasPrefix(line, profile+" (") {
return true, nil
}
case io.EOF:
return false, nil
default:
return false, err
}
}
}
// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
func snapshotterOpts(config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/oci"
)
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return []oci.SpecOpts{}, nil
}
// snapshotterOpts returns snapshotter options for the rootfs snapshot
func snapshotterOpts(config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -0,0 +1,115 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/internal/cri/annotations"
)
// checkMount is defined by all tests but not used here
var _ = checkMount
func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
*imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{
Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
},
Command: []string{"test", "command"},
Args: []string{"test", "args"},
WorkingDir: "test-cwd",
Envs: []*runtime.KeyValue{
{Key: "k1", Value: "v1"},
{Key: "k2", Value: "v2"},
{Key: "k3", Value: "v3=v3bis"},
{Key: "k4", Value: "v4=v4bis=foop"},
},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"ca-c": "ca-d"},
Mounts: []*runtime.Mount{
// everything default
{
ContainerPath: "container-path-1",
HostPath: "host-path-1",
},
// readOnly
{
ContainerPath: "container-path-2",
HostPath: "host-path-2",
Readonly: true,
},
},
}
sandboxConfig := &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-sandbox-name",
Uid: "test-sandbox-uid",
Namespace: "test-sandbox-ns",
Attempt: 2,
},
Annotations: map[string]string{"c": "d"},
}
imageConfig := &imagespec.ImageConfig{
Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"},
Entrypoint: []string{"/entrypoint"},
Cmd: []string{"cmd"},
WorkingDir: "/workspace",
}
specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args)
assert.Equal(t, "test-cwd", spec.Process.Cwd)
assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop")
assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop")
t.Logf("Check bind mount")
checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rw"}, nil)
checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"ro"}, nil)
t.Logf("Check PodSandbox annotations")
assert.Contains(t, spec.Annotations, annotations.SandboxID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID)
assert.Contains(t, spec.Annotations, annotations.ContainerType)
assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer)
assert.Contains(t, spec.Annotations, annotations.SandboxNamespace)
assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns")
assert.Contains(t, spec.Annotations, annotations.SandboxUID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid")
assert.Contains(t, spec.Annotations, annotations.SandboxName)
assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name")
assert.Contains(t, spec.Annotations, annotations.ImageName)
assert.EqualValues(t, spec.Annotations[annotations.ImageName], testImageName)
}
return config, sandboxConfig, imageConfig, specCheck
}

View File

@@ -0,0 +1,782 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"os"
"path/filepath"
goruntime "runtime"
"testing"
ostesting "github.com/containerd/containerd/v2/pkg/os/testing"
"github.com/containerd/platforms"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/constants"
"github.com/containerd/containerd/v2/internal/cri/opts"
"github.com/containerd/containerd/v2/pkg/oci"
)
var currentPlatform = platforms.DefaultSpec()
func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string,
contains, notcontains []string) {
found := false
for _, m := range mounts {
if m.Source == src && m.Destination == dest {
assert.Equal(t, m.Type, typ)
for _, c := range contains {
assert.Contains(t, m.Options, c)
}
for _, n := range notcontains {
assert.NotContains(t, m.Options, n)
}
found = true
break
}
}
assert.True(t, found, "mount from %q to %q not found", src, dest)
}
const testImageName = "container-image-name"
func TestGeneralContainerSpec(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
testSandboxID := "sandbox-id"
testContainerName := "container-name"
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
require.NoError(t, err)
specCheck(t, testID, testSandboxID, testPid, spec)
}
func TestPodAnnotationPassthroughContainerSpec(t *testing.T) {
switch goruntime.GOOS {
case "darwin":
t.Skip("not implemented on Darwin")
case "freebsd":
t.Skip("not implemented on FreeBSD")
}
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
for _, test := range []struct {
desc string
podAnnotations []string
configChange func(*runtime.PodSandboxConfig)
specCheck func(*testing.T, *runtimespec.Spec)
}{
{
desc: "a passthrough annotation should be passed as an OCI annotation",
podAnnotations: []string{"c"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["c"], "d")
},
},
{
desc: "a non-passthrough annotation should not be passed as an OCI annotation",
configChange: func(c *runtime.PodSandboxConfig) {
c.Annotations["d"] = "e"
},
podAnnotations: []string{"c"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["c"], "d")
_, ok := spec.Annotations["d"]
assert.False(t, ok)
},
},
{
desc: "passthrough annotations should support wildcard match",
configChange: func(c *runtime.PodSandboxConfig) {
c.Annotations["t.f"] = "j"
c.Annotations["z.g"] = "o"
c.Annotations["z"] = "o"
c.Annotations["y.ca"] = "b"
c.Annotations["y"] = "b"
},
podAnnotations: []string{"t*", "z.*", "y.c*"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
t.Logf("%+v", spec.Annotations)
assert.Equal(t, spec.Annotations["t.f"], "j")
assert.Equal(t, spec.Annotations["z.g"], "o")
assert.Equal(t, spec.Annotations["y.ca"], "b")
_, ok := spec.Annotations["y"]
assert.False(t, ok)
_, ok = spec.Annotations["z"]
assert.False(t, ok)
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
if test.configChange != nil {
test.configChange(sandboxConfig)
}
ociRuntime := config.Runtime{
PodAnnotations: test.podAnnotations,
}
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName,
containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
if test.specCheck != nil {
test.specCheck(t, spec)
}
})
}
}
func TestContainerSpecCommand(t *testing.T) {
for _, test := range []struct {
desc string
criEntrypoint []string
criArgs []string
imageEntrypoint []string
imageArgs []string
expected []string
expectErr bool
}{
{
desc: "should use cri entrypoint if it's specified",
criEntrypoint: []string{"a", "b"},
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"a", "b"},
},
{
desc: "should use cri entrypoint if it's specified even if it's empty",
criEntrypoint: []string{},
criArgs: []string{"a", "b"},
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"a", "b"},
},
{
desc: "should use cri entrypoint and args if they are specified",
criEntrypoint: []string{"a", "b"},
criArgs: []string{"c", "d"},
imageEntrypoint: []string{"e", "f"},
imageArgs: []string{"g", "h"},
expected: []string{"a", "b", "c", "d"},
},
{
desc: "should use image entrypoint if cri entrypoint is not specified",
criArgs: []string{"a", "b"},
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"c", "d", "a", "b"},
},
{
desc: "should use image args if both cri entrypoint and args are not specified",
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"c", "d", "e", "f"},
},
{
desc: "should return error if both entrypoint and args are empty",
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config, _, imageConfig, _ := getCreateContainerTestData()
config.Command = test.criEntrypoint
config.Args = test.criArgs
imageConfig.Entrypoint = test.imageEntrypoint
imageConfig.Cmd = test.imageArgs
var spec runtimespec.Spec
err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec)
if test.expectErr {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.expected, spec.Process.Args, test.desc)
})
}
}
func TestVolumeMounts(t *testing.T) {
testContainerRootDir := "test-container-root"
idmap := []*runtime.IDMapping{
{
ContainerId: 0,
HostId: 100,
Length: 1,
},
}
for _, test := range []struct {
desc string
platform platforms.Platform
criMounts []*runtime.Mount
usernsEnabled bool
imageVolumes map[string]struct{}
expectedMountDest []string
expectedMappings []*runtime.IDMapping
}{
{
desc: "should setup rw mount for image volumes",
imageVolumes: map[string]struct{}{
"/test-volume-1": {},
"/test-volume-2": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/test-volume-2",
},
},
{
desc: "should skip image volumes if already mounted by CRI",
criMounts: []*runtime.Mount{
{
ContainerPath: "/test-volume-1",
HostPath: "/test-hostpath-1",
},
},
imageVolumes: map[string]struct{}{
"/test-volume-1": {},
"/test-volume-2": {},
},
expectedMountDest: []string{
"/test-volume-2",
},
},
{
desc: "should compare and return cleanpath",
criMounts: []*runtime.Mount{
{
ContainerPath: "/test-volume-1",
HostPath: "/test-hostpath-1",
},
},
imageVolumes: map[string]struct{}{
"/test-volume-1/": {},
"/test-volume-2/": {},
},
expectedMountDest: []string{
"/test-volume-2/",
},
},
{
desc: "should make relative paths absolute on Linux",
platform: platforms.Platform{OS: "linux"},
imageVolumes: map[string]struct{}{
"./test-volume-1": {},
"C:/test-volume-2": {},
"../../test-volume-3": {},
"/abs/test-volume-4": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/C:/test-volume-2",
"/test-volume-3",
"/abs/test-volume-4",
},
},
{
desc: "should include mappings for image volumes on Linux",
platform: platforms.Platform{OS: "linux"},
usernsEnabled: true,
imageVolumes: map[string]struct{}{
"/test-volume-1/": {},
"/test-volume-2/": {},
},
expectedMountDest: []string{
"/test-volume-2/",
"/test-volume-2/",
},
expectedMappings: idmap,
},
{
desc: "should NOT include mappings for image volumes on Linux if !userns",
platform: platforms.Platform{OS: "linux"},
usernsEnabled: false,
imageVolumes: map[string]struct{}{
"/test-volume-1/": {},
"/test-volume-2/": {},
},
expectedMountDest: []string{
"/test-volume-2/",
"/test-volume-2/",
},
},
{
desc: "should convert rel imageVolume paths to abs paths and add userns mappings",
platform: platforms.Platform{OS: "linux"},
usernsEnabled: true,
imageVolumes: map[string]struct{}{
"test-volume-1/": {},
"C:/test-volume-2/": {},
"../../test-volume-3/": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/C:/test-volume-2",
"/test-volume-3",
},
expectedMappings: idmap,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config := &imagespec.ImageConfig{
Volumes: test.imageVolumes,
}
containerConfig := &runtime.ContainerConfig{Mounts: test.criMounts}
if test.usernsEnabled {
containerConfig.Linux = &runtime.LinuxContainerConfig{
SecurityContext: &runtime.LinuxContainerSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: idmap,
Gids: idmap,
},
},
},
}
}
c := newTestCRIService()
got := c.volumeMounts(test.platform, testContainerRootDir, containerConfig, config)
assert.Len(t, got, len(test.expectedMountDest))
for _, dest := range test.expectedMountDest {
found := false
for _, m := range got {
if m.ContainerPath != dest {
continue
}
found = true
assert.Equal(t,
filepath.Dir(m.HostPath),
filepath.Join(testContainerRootDir, "volumes"))
if test.expectedMappings != nil {
assert.Equal(t, test.expectedMappings, m.UidMappings)
assert.Equal(t, test.expectedMappings, m.GidMappings)
}
break
}
assert.True(t, found)
}
})
}
}
func TestContainerAnnotationPassthroughContainerSpec(t *testing.T) {
switch goruntime.GOOS {
case "darwin":
t.Skip("not implemented on Darwin")
case "freebsd":
t.Skip("not implemented on FreeBSD")
}
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
for _, test := range []struct {
desc string
podAnnotations []string
containerAnnotations []string
podConfigChange func(*runtime.PodSandboxConfig)
configChange func(*runtime.ContainerConfig)
specCheck func(*testing.T, *runtimespec.Spec)
}{
{
desc: "passthrough annotations from pod and container should be passed as an OCI annotation",
podConfigChange: func(p *runtime.PodSandboxConfig) {
p.Annotations["pod.annotation.1"] = "1"
p.Annotations["pod.annotation.2"] = "2"
p.Annotations["pod.annotation.3"] = "3"
},
configChange: func(c *runtime.ContainerConfig) {
c.Annotations["container.annotation.1"] = "1"
c.Annotations["container.annotation.2"] = "2"
c.Annotations["container.annotation.3"] = "3"
},
podAnnotations: []string{"pod.annotation.1"},
containerAnnotations: []string{"container.annotation.1"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, "1", spec.Annotations["container.annotation.1"])
_, ok := spec.Annotations["container.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["container.annotation.3"]
assert.False(t, ok)
assert.Equal(t, "1", spec.Annotations["pod.annotation.1"])
_, ok = spec.Annotations["pod.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.3"]
assert.False(t, ok)
},
},
{
desc: "passthrough annotations from pod and container should support wildcard",
podConfigChange: func(p *runtime.PodSandboxConfig) {
p.Annotations["pod.annotation.1"] = "1"
p.Annotations["pod.annotation.2"] = "2"
p.Annotations["pod.annotation.3"] = "3"
},
configChange: func(c *runtime.ContainerConfig) {
c.Annotations["container.annotation.1"] = "1"
c.Annotations["container.annotation.2"] = "2"
c.Annotations["container.annotation.3"] = "3"
},
podAnnotations: []string{"pod.annotation.*"},
containerAnnotations: []string{"container.annotation.*"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, "1", spec.Annotations["container.annotation.1"])
assert.Equal(t, "2", spec.Annotations["container.annotation.2"])
assert.Equal(t, "3", spec.Annotations["container.annotation.3"])
assert.Equal(t, "1", spec.Annotations["pod.annotation.1"])
assert.Equal(t, "2", spec.Annotations["pod.annotation.2"])
assert.Equal(t, "3", spec.Annotations["pod.annotation.3"])
},
},
{
desc: "annotations should not pass through if no passthrough annotations are configured",
podConfigChange: func(p *runtime.PodSandboxConfig) {
p.Annotations["pod.annotation.1"] = "1"
p.Annotations["pod.annotation.2"] = "2"
p.Annotations["pod.annotation.3"] = "3"
},
configChange: func(c *runtime.ContainerConfig) {
c.Annotations["container.annotation.1"] = "1"
c.Annotations["container.annotation.2"] = "2"
c.Annotations["container.annotation.3"] = "3"
},
podAnnotations: []string{},
containerAnnotations: []string{},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
_, ok := spec.Annotations["container.annotation.1"]
assert.False(t, ok)
_, ok = spec.Annotations["container.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["container.annotation.3"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.1"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.3"]
assert.False(t, ok)
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
if test.configChange != nil {
test.configChange(containerConfig)
}
if test.podConfigChange != nil {
test.podConfigChange(sandboxConfig)
}
ociRuntime := config.Runtime{
PodAnnotations: test.podAnnotations,
ContainerAnnotations: test.containerAnnotations,
}
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName,
containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
if test.specCheck != nil {
test.specCheck(t, spec)
}
})
}
}
func TestBaseRuntimeSpec(t *testing.T) {
c := newTestCRIService(withRuntimeService(&fakeRuntimeService{
ocispecs: map[string]*oci.Spec{
"/etc/containerd/cri-base.json": {
Version: "1.0.2",
Hostname: "old",
},
},
}))
out, err := c.runtimeSpec(
"id1",
platforms.DefaultSpec(),
"/etc/containerd/cri-base.json",
oci.WithHostname("new-host"),
oci.WithDomainname("new-domain"),
)
assert.NoError(t, err)
assert.Equal(t, "1.0.2", out.Version)
assert.Equal(t, "new-host", out.Hostname)
assert.Equal(t, "new-domain", out.Domainname)
// Make sure original base spec not changed
spec, err := c.LoadOCISpec("/etc/containerd/cri-base.json")
assert.NoError(t, err)
assert.NotEqual(t, out, spec)
assert.Equal(t, spec.Hostname, "old")
assert.Equal(t, filepath.Join("/", constants.K8sContainerdNamespace, "id1"), out.Linux.CgroupsPath)
}
func TestLinuxContainerMounts(t *testing.T) {
const testSandboxID = "test-id"
idmap := []*runtime.IDMapping{
{
ContainerId: 0,
HostId: 100,
Length: 1,
},
}
for _, test := range []struct {
desc string
statFn func(string) (os.FileInfo, error)
criMounts []*runtime.Mount
securityContext *runtime.LinuxContainerSecurityContext
expectedMounts []*runtime.Mount
}{
{
desc: "should setup ro mount when rootfs is read-only",
securityContext: &runtime.LinuxContainerSecurityContext{
ReadonlyRootfs: true,
},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: true,
SelinuxRelabel: true,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: true,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: true,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
{
desc: "should setup rw mount when rootfs is read-write",
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
{
desc: "should setup uidMappings/gidMappings when userns is used",
securityContext: &runtime.LinuxContainerSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: idmap,
Gids: idmap,
},
},
},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: false,
SelinuxRelabel: true,
UidMappings: idmap,
GidMappings: idmap,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
UidMappings: idmap,
GidMappings: idmap,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
UidMappings: idmap,
GidMappings: idmap,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
{
desc: "should use host /dev/shm when host ipc is set",
securityContext: &runtime.LinuxContainerSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE},
},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: "/dev/shm",
Readonly: false,
},
},
},
{
desc: "should skip container mounts if already mounted by CRI",
criMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: "/test-etc-hostname",
},
{
ContainerPath: "/etc/hosts",
HostPath: "/test-etc-host",
},
{
ContainerPath: resolvConfPath,
HostPath: "test-resolv-conf",
},
{
ContainerPath: "/dev/shm",
HostPath: "test-dev-shm",
},
},
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: nil,
},
{
desc: "should skip hostname mount if the old sandbox doesn't have hostname file",
statFn: func(path string) (os.FileInfo, error) {
assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path)
return nil, errors.New("random error")
},
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Mounts: test.criMounts,
Linux: &runtime.LinuxContainerConfig{
SecurityContext: test.securityContext,
},
}
c := newTestCRIService()
c.os.(*ostesting.FakeOS).StatFn = test.statFn
mounts := c.linuxContainerMounts(testSandboxID, config)
assert.Equal(t, test.expectedMounts, mounts, test.desc)
})
}
}

View File

@@ -0,0 +1,48 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"strconv"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/oci"
)
// No extra spec options needed for windows.
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return nil, nil
}
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
func snapshotterOpts(config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
var opts []snapshots.Opt
// TODO: Only set for windows and cimfs snapshotter
rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes()
if rootfsSize != 0 {
labels := map[string]string{
"containerd.io/snapshot/windows/rootfs.sizebytes": strconv.FormatInt(rootfsSize, 10),
}
opts = append(opts, snapshots.WithLabels(labels))
}
return opts, nil
}

View File

@@ -0,0 +1,363 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"github.com/stretchr/testify/require"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/internal/cri/annotations"
"github.com/containerd/containerd/v2/internal/cri/config"
)
func getSandboxConfig() *runtime.PodSandboxConfig {
return &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-sandbox-name",
Uid: "test-sandbox-uid",
Namespace: "test-sandbox-ns",
Attempt: 2,
},
Windows: &runtime.WindowsPodSandboxConfig{},
Hostname: "test-hostname",
Annotations: map[string]string{"c": "d"},
}
}
func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
*imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{
Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
},
Command: []string{"test", "command"},
Args: []string{"test", "args"},
WorkingDir: "test-cwd",
Envs: []*runtime.KeyValue{
{Key: "k1", Value: "v1"},
{Key: "k2", Value: "v2"},
{Key: "k3", Value: "v3=v3bis"},
{Key: "k4", Value: "v4=v4bis=foop"},
},
Mounts: []*runtime.Mount{
// everything default
{
ContainerPath: "container-path-1",
HostPath: "host-path-1",
},
// readOnly
{
ContainerPath: "container-path-2",
HostPath: "host-path-2",
Readonly: true,
},
},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
Windows: &runtime.WindowsContainerConfig{
Resources: &runtime.WindowsContainerResources{
CpuShares: 100,
CpuCount: 200,
CpuMaximum: 300,
MemoryLimitInBytes: 400,
},
SecurityContext: &runtime.WindowsContainerSecurityContext{
RunAsUsername: "test-user",
CredentialSpec: "{\"test\": \"spec\"}",
HostProcess: false,
},
},
}
sandboxConfig := getSandboxConfig()
imageConfig := &imagespec.ImageConfig{
Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"},
Entrypoint: []string{"/entrypoint"},
Cmd: []string{"cmd"},
WorkingDir: "/workspace",
User: "ContainerUser",
}
specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
assert.Nil(t, spec.Root)
assert.Equal(t, "test-hostname", spec.Hostname)
assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args)
assert.Equal(t, "test-cwd", spec.Process.Cwd)
assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop")
assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop")
t.Logf("Check bind mount")
checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "", []string{"rw"}, nil)
checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "", []string{"ro"}, nil)
t.Logf("Check resource limits")
assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, 100)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Count, 200)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300)
assert.EqualValues(t, *spec.Windows.Resources.Memory.Limit, 400)
// Also checks if override of the image configs user is behaving.
t.Logf("Check username")
assert.Contains(t, spec.Process.User.Username, "test-user")
t.Logf("Check credential spec")
assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}")
t.Logf("Check PodSandbox annotations")
assert.Contains(t, spec.Annotations, annotations.SandboxID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID)
assert.Contains(t, spec.Annotations, annotations.ContainerType)
assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer)
assert.Contains(t, spec.Annotations, annotations.SandboxNamespace)
assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns")
assert.Contains(t, spec.Annotations, annotations.SandboxUID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid")
assert.Contains(t, spec.Annotations, annotations.SandboxName)
assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name")
assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess)
assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false")
}
return config, sandboxConfig, imageConfig, specCheck
}
func TestContainerWindowsNetworkNamespace(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-cni"
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
assert.NotNil(t, spec.Windows)
assert.NotNil(t, spec.Windows.Network)
assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace)
}
func TestMountCleanPath(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-cni"
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{
ContainerPath: "c:/test/container-path",
HostPath: "c:/test/host-path",
})
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
checkMount(t, spec.Mounts, "c:\\test\\host-path", "c:\\test\\container-path", "", []string{"rw"}, nil)
}
func TestMountNamedPipe(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-cni"
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{
ContainerPath: `\\.\pipe\foo`,
HostPath: `\\.\pipe\foo`,
})
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
checkMount(t, spec.Mounts, `\\.\pipe\foo`, `\\.\pipe\foo`, "", []string{"rw"}, nil)
}
func TestHostProcessRequirements(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
for _, test := range []struct {
desc string
containerHostProcess bool
sandboxHostProcess bool
expectError bool
}{
{
desc: "hostprocess container in non-hostprocess sandbox should fail",
containerHostProcess: true,
sandboxHostProcess: false,
expectError: true,
},
{
desc: "hostprocess container in hostprocess sandbox should be fine",
containerHostProcess: true,
sandboxHostProcess: true,
expectError: false,
},
{
desc: "non-hostprocess container in hostprocess sandbox should fail",
containerHostProcess: false,
sandboxHostProcess: true,
expectError: true,
},
{
desc: "non-hostprocess container in non-hostprocess sandbox should be fine",
containerHostProcess: false,
sandboxHostProcess: false,
expectError: false,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
containerConfig.Windows.SecurityContext.HostProcess = test.containerHostProcess
sandboxConfig.Windows.SecurityContext = &runtime.WindowsSandboxSecurityContext{
HostProcess: test.sandboxHostProcess,
}
_, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
if test.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
func TestEntrypointAndCmdForArgsEscaped(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-ns"
c := newTestCRIService()
for name, test := range map[string]struct {
imgEntrypoint []string
imgCmd []string
command []string
args []string
expectedArgs []string
expectedCommandLine string
ArgsEscaped bool
}{
// override image entrypoint and cmd in shell form with container args and verify expected runtime spec
"TestShellFormImgEntrypointCmdWithCtrArgs": {
imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`},
imgCmd: []string{`cmd -args "hello world"`},
command: nil,
args: []string{`cmd -args "additional args"`},
expectedArgs: nil,
expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "test value" "cmd -args \"additional args\""`,
ArgsEscaped: true,
},
// check image entrypoint and cmd in shell form without overriding with container command and args and verify expected runtime spec
"TestShellFormImgEntrypointCmdWithoutCtrArgs": {
imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`},
imgCmd: []string{`cmd -args "hello world"`},
command: nil,
args: nil,
expectedArgs: nil,
expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "test value" "cmd -args \"hello world\""`,
ArgsEscaped: true,
},
// override image entrypoint and cmd by container command and args in shell form and verify expected runtime spec
"TestShellFormImgEntrypointCmdWithCtrEntrypointAndArgs": {
imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`},
imgCmd: []string{`cmd -args "hello world"`},
command: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "additional test value"},
args: []string{"cmd", "-args", "additional args"},
expectedArgs: nil,
expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "additional test value" cmd -args "additional args"`,
ArgsEscaped: true,
},
// override image cmd by container args in exec form and verify expected runtime spec
"TestExecFormImgEntrypointCmdWithCtrArgs": {
imgEntrypoint: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value"},
imgCmd: []string{"cmd", "-args", "hello world"},
command: nil,
args: []string{"additional", "args"},
expectedArgs: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value", "additional", "args"},
expectedCommandLine: "",
ArgsEscaped: false,
},
// check image entrypoint and cmd in exec form without overriding with container command and args and verify expected runtime spec
"TestExecFormImgEntrypointCmdWithoutCtrArgs": {
imgEntrypoint: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value"},
imgCmd: []string{"cmd", "-args", "hello world"},
command: nil,
args: nil,
expectedArgs: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value", "cmd", "-args", "hello world"},
expectedCommandLine: "",
ArgsEscaped: false,
},
} {
t.Run(name, func(t *testing.T) {
imageConfig := &imagespec.ImageConfig{
Entrypoint: test.imgEntrypoint,
Cmd: test.imgCmd,
ArgsEscaped: test.ArgsEscaped,
}
sandboxConfig := getSandboxConfig()
containerConfig := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{
Image: testImageName,
},
Command: test.command,
Args: test.args,
Windows: &runtime.WindowsContainerConfig{},
}
runtimeSpec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, runtimeSpec)
// check the runtime spec for expected commandline and args
actualCommandLine := runtimeSpec.Process.CommandLine
actualArgs := runtimeSpec.Process.Args
require.Equal(t, actualArgs, test.expectedArgs)
require.Equal(t, actualCommandLine, test.expectedCommandLine)
})
}
}

View File

@@ -0,0 +1,33 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) GetContainerEvents(r *runtime.GetEventsRequest, s runtime.RuntimeService_GetContainerEventsServer) error {
eventC, closer := c.containerEventsQ.Subscribe()
defer closer.Close()
for event := range eventC {
if err := s.Send(&event); err != nil {
return err
}
}
return nil
}

View File

@@ -0,0 +1,37 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// Exec prepares a streaming endpoint to execute a command in the container, and returns the address.
func (c *criService) Exec(ctx context.Context, r *runtime.ExecRequest) (*runtime.ExecResponse, error) {
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container %q in store: %w", r.GetContainerId(), err)
}
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.streamServer.GetExec(r)
}

View File

@@ -0,0 +1,310 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"context"
"fmt"
"io"
"syscall"
"time"
containerd "github.com/containerd/containerd/v2/client"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"k8s.io/client-go/tools/remotecommand"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/internal/cri/io"
"github.com/containerd/containerd/v2/internal/cri/util"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
)
type cappedWriter struct {
w io.WriteCloser
remain int
}
func (cw *cappedWriter) Write(p []byte) (int, error) {
if cw.remain <= 0 {
return len(p), nil
}
end := cw.remain
if end > len(p) {
end = len(p)
}
written, err := cw.w.Write(p[0:end])
cw.remain -= written
if err != nil {
return written, err
}
return len(p), nil
}
func (cw *cappedWriter) Close() error {
return cw.w.Close()
}
func (cw *cappedWriter) isFull() bool {
return cw.remain <= 0
}
// ExecSync executes a command in the container, and returns the stdout output.
// If command exits with a non-zero exit code, an error is returned.
func (c *criService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (*runtime.ExecSyncResponse, error) {
const maxStreamSize = 1024 * 1024 * 16
var stdout, stderr bytes.Buffer
// cappedWriter truncates the output. In that case, the size of
// the ExecSyncResponse will hit the CRI plugin's gRPC response limit.
// Thus the callers outside of the containerd process (e.g. Kubelet) never see
// the truncated output.
cout := &cappedWriter{w: cioutil.NewNopWriteCloser(&stdout), remain: maxStreamSize}
cerr := &cappedWriter{w: cioutil.NewNopWriteCloser(&stderr), remain: maxStreamSize}
exitCode, err := c.execInContainer(ctx, r.GetContainerId(), execOptions{
cmd: r.GetCmd(),
stdout: cout,
stderr: cerr,
timeout: time.Duration(r.GetTimeout()) * time.Second,
})
if err != nil {
return nil, fmt.Errorf("failed to exec in container: %w", err)
}
return &runtime.ExecSyncResponse{
Stdout: stdout.Bytes(),
Stderr: stderr.Bytes(),
ExitCode: int32(*exitCode),
}, nil
}
// execOptions specifies how to execute command in container.
type execOptions struct {
cmd []string
stdin io.Reader
stdout io.WriteCloser
stderr io.WriteCloser
tty bool
resize <-chan remotecommand.TerminalSize
timeout time.Duration
}
func (c *criService) execInternal(ctx context.Context, container containerd.Container, id string, opts execOptions) (*uint32, error) {
// Cancel the context before returning to ensure goroutines are stopped.
// This is important, because if `Start` returns error, `Wait` will hang
// forever unless we cancel the context.
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var drainExecSyncIOTimeout time.Duration
var err error
if c.config.DrainExecSyncIOTimeout != "" {
drainExecSyncIOTimeout, err = time.ParseDuration(c.config.DrainExecSyncIOTimeout)
if err != nil {
return nil, fmt.Errorf("failed to parse drain_exec_sync_io_timeout %q: %w",
c.config.DrainExecSyncIOTimeout, err)
}
}
spec, err := container.Spec(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get container spec: %w", err)
}
task, err := container.Task(ctx, nil)
if err != nil {
return nil, fmt.Errorf("failed to load task: %w", err)
}
pspec := spec.Process
pspec.Terminal = opts.tty
if opts.tty {
if err := oci.WithEnv([]string{"TERM=xterm"})(ctx, nil, nil, spec); err != nil {
return nil, fmt.Errorf("add TERM env var to spec: %w", err)
}
}
pspec.Args = opts.cmd
// CommandLine may already be set on the container's spec, but we want to only use Args here.
pspec.CommandLine = ""
if opts.stdout == nil {
opts.stdout = cio.NewDiscardLogger()
}
if opts.stderr == nil {
opts.stderr = cio.NewDiscardLogger()
}
execID := util.GenerateID()
log.G(ctx).Debugf("Generated exec id %q for container %q", execID, id)
volatileRootDir := c.getVolatileContainerRootDir(id)
var execIO *cio.ExecIO
process, err := task.Exec(ctx, execID, pspec,
func(id string) (containerdio.IO, error) {
var err error
execIO, err = cio.NewExecIO(id, volatileRootDir, opts.tty, opts.stdin != nil)
return execIO, err
},
)
if err != nil {
return nil, fmt.Errorf("failed to create exec %q: %w", execID, err)
}
defer func() {
deferCtx, deferCancel := util.DeferContext()
defer deferCancel()
if _, err := process.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete exec process %q for container %q", execID, id)
}
}()
exitCh, err := process.Wait(ctx)
if err != nil {
return nil, fmt.Errorf("failed to wait for process %q: %w", execID, err)
}
if err := process.Start(ctx); err != nil {
return nil, fmt.Errorf("failed to start exec %q: %w", execID, err)
}
handleResizing(ctx, opts.resize, func(size remotecommand.TerminalSize) {
if err := process.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to resize process %q console for container %q", execID, id)
}
})
attachDone := execIO.Attach(cio.AttachOptions{
Stdin: opts.stdin,
Stdout: opts.stdout,
Stderr: opts.stderr,
Tty: opts.tty,
StdinOnce: true,
CloseStdin: func() error {
return process.CloseIO(ctx, containerd.WithStdinCloser)
},
})
execCtx := ctx
if opts.timeout > 0 {
var execCtxCancel context.CancelFunc
execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout)
defer execCtxCancel()
}
select {
case <-execCtx.Done():
// Ignore the not found error because the process may exit itself before killing.
if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to kill exec %q: %w", execID, err)
}
// Wait for the process to be killed.
exitRes := <-exitCh
log.G(ctx).Debugf("Timeout received while waiting for exec process kill %q code %d and error %v",
execID, exitRes.ExitCode(), exitRes.Error())
if err := drainExecSyncIO(ctx, process, drainExecSyncIOTimeout, attachDone); err != nil {
log.G(ctx).WithError(err).Warnf("failed to drain exec process %q io", execID)
}
return nil, fmt.Errorf("timeout %v exceeded: %w", opts.timeout, execCtx.Err())
case exitRes := <-exitCh:
code, _, err := exitRes.Result()
log.G(ctx).Debugf("Exec process %q exits with exit code %d and error %v", execID, code, err)
if err != nil {
return nil, fmt.Errorf("failed while waiting for exec %q: %w", execID, err)
}
if err := drainExecSyncIO(ctx, process, drainExecSyncIOTimeout, attachDone); err != nil {
return nil, fmt.Errorf("failed to drain exec process %q io: %w", execID, err)
}
return &code, nil
}
}
// execInContainer executes a command inside the container synchronously, and
// redirects stdio stream properly.
// This function only returns when the exec process exits, this means that:
// 1) As long as the exec process is running, the goroutine in the cri plugin
// will be running and wait for the exit code;
// 2) `kubectl exec -it` will hang until the exec process exits, even after io
// is detached. This is different from dockershim, which leaves the exec process
// running in background after io is detached.
// https://github.com/kubernetes/kubernetes/blob/v1.15.0/pkg/kubelet/dockershim/exec.go#L127
// For example, if the `kubectl exec -it` process is killed, IO will be closed. In
// this case, the CRI plugin will still have a goroutine waiting for the exec process
// to exit and log the exit code, but dockershim won't.
func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) {
// Get container from our container store.
cntr, err := c.containerStore.Get(id)
if err != nil {
return nil, fmt.Errorf("failed to find container %q in store: %w", id, err)
}
id = cntr.ID
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.execInternal(ctx, cntr.Container, id, opts)
}
// drainExecSyncIO drains process IO with timeout after exec init process exits.
//
// By default, the child processes spawned by exec process will inherit standard
// io file descriptors. The shim server creates a pipe as data channel. Both
// exec process and its children write data into the write end of the pipe.
// And the shim server will read data from the pipe. If the write end is still
// open, the shim server will continue to wait for data from pipe.
//
// If the exec command is like `bash -c "sleep 365d &"`, the exec process
// is bash and quit after create `sleep 365d`. But the `sleep 365d` will hold
// the write end of the pipe for a year! It doesn't make senses that CRI plugin
// should wait for it.
func drainExecSyncIO(ctx context.Context, execProcess containerd.Process, drainExecIOTimeout time.Duration, attachDone <-chan struct{}) error {
var timerCh <-chan time.Time
if drainExecIOTimeout != 0 {
timer := time.NewTimer(drainExecIOTimeout)
defer timer.Stop()
timerCh = timer.C
}
select {
case <-timerCh:
case <-attachDone:
log.G(ctx).Tracef("Stream pipe for exec process %q done", execProcess.ID())
return nil
}
log.G(ctx).Debugf("Exec process %q exits but the io is still held by other processes. Trying to delete exec process to release io", execProcess.ID())
_, err := execProcess.Delete(ctx, containerd.WithProcessKill)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to release exec io by deleting exec process %q: %w",
execProcess.ID(), err)
}
}
return fmt.Errorf("failed to drain exec process %q io in %s because io is still held by other processes",
execProcess.ID(), drainExecIOTimeout)
}

View File

@@ -0,0 +1,150 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"context"
"os"
"syscall"
"testing"
"time"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/pkg/cio"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
"github.com/stretchr/testify/assert"
)
func TestCWWrite(t *testing.T) {
var buf bytes.Buffer
cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 10}
n, err := cw.Write([]byte("hello"))
assert.NoError(t, err)
assert.Equal(t, 5, n)
n, err = cw.Write([]byte("helloworld"))
assert.NoError(t, err, "no errors even it hits the cap")
assert.Equal(t, 10, n, "no indication of partial write")
assert.True(t, cw.isFull())
assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped")
_, err = cw.Write([]byte("world"))
assert.NoError(t, err)
assert.True(t, cw.isFull())
assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped")
}
func TestCWClose(t *testing.T) {
var buf bytes.Buffer
cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 5}
err := cw.Close()
assert.NoError(t, err)
}
func TestDrainExecSyncIO(t *testing.T) {
ctx := context.TODO()
t.Run("NoTimeout", func(t *testing.T) {
ep := &fakeExecProcess{
id: t.Name(),
pid: uint32(os.Getpid()),
}
attachDoneCh := make(chan struct{})
time.AfterFunc(2*time.Second, func() { close(attachDoneCh) })
assert.NoError(t, drainExecSyncIO(ctx, ep, 0, attachDoneCh))
assert.Equal(t, 0, len(ep.actionEvents))
})
t.Run("With3Seconds", func(t *testing.T) {
ep := &fakeExecProcess{
id: t.Name(),
pid: uint32(os.Getpid()),
}
attachDoneCh := make(chan struct{})
time.AfterFunc(100*time.Second, func() { close(attachDoneCh) })
assert.Error(t, drainExecSyncIO(ctx, ep, 3*time.Second, attachDoneCh))
assert.Equal(t, []string{"Delete"}, ep.actionEvents)
})
}
type fakeExecProcess struct {
id string
pid uint32
actionEvents []string
}
// ID of the process
func (p *fakeExecProcess) ID() string {
return p.id
}
// Pid is the system specific process id
func (p *fakeExecProcess) Pid() uint32 {
return p.pid
}
// Start starts the process executing the user's defined binary
func (p *fakeExecProcess) Start(context.Context) error {
p.actionEvents = append(p.actionEvents, "Start")
return nil
}
// Delete removes the process and any resources allocated returning the exit status
func (p *fakeExecProcess) Delete(context.Context, ...containerd.ProcessDeleteOpts) (*containerd.ExitStatus, error) {
p.actionEvents = append(p.actionEvents, "Delete")
return nil, nil
}
// Kill sends the provided signal to the process
func (p *fakeExecProcess) Kill(context.Context, syscall.Signal, ...containerd.KillOpts) error {
p.actionEvents = append(p.actionEvents, "Kill")
return nil
}
// Wait asynchronously waits for the process to exit, and sends the exit code to the returned channel
func (p *fakeExecProcess) Wait(context.Context) (<-chan containerd.ExitStatus, error) {
p.actionEvents = append(p.actionEvents, "Wait")
return nil, nil
}
// CloseIO allows various pipes to be closed on the process
func (p *fakeExecProcess) CloseIO(context.Context, ...containerd.IOCloserOpts) error {
p.actionEvents = append(p.actionEvents, "CloseIO")
return nil
}
// Resize changes the width and height of the process's terminal
func (p *fakeExecProcess) Resize(ctx context.Context, w, h uint32) error {
p.actionEvents = append(p.actionEvents, "Resize")
return nil
}
// IO returns the io set for the process
func (p *fakeExecProcess) IO() cio.IO {
p.actionEvents = append(p.actionEvents, "IO")
return nil
}
// Status returns the executing status of the process
func (p *fakeExecProcess) Status(context.Context) (containerd.Status, error) {
p.actionEvents = append(p.actionEvents, "Status")
return containerd.Status{}, nil
}

View File

@@ -0,0 +1,116 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"time"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
)
// ListContainers lists all containers matching the filter.
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
start := time.Now()
// List all containers from store.
containersInStore := c.containerStore.List()
var containers []*runtime.Container
for _, container := range containersInStore {
containers = append(containers, toCRIContainer(container))
}
containers = c.filterCRIContainers(containers, r.GetFilter())
containerListTimer.UpdateSince(start)
return &runtime.ListContainersResponse{Containers: containers}, nil
}
// toCRIContainer converts internal container object into CRI container.
func toCRIContainer(container containerstore.Container) *runtime.Container {
status := container.Status.Get()
return &runtime.Container{
Id: container.ID,
PodSandboxId: container.SandboxID,
Metadata: container.Config.GetMetadata(),
Image: container.Config.GetImage(),
ImageRef: container.ImageRef,
State: status.State(),
CreatedAt: status.CreatedAt,
Labels: container.Config.GetLabels(),
Annotations: container.Config.GetAnnotations(),
}
}
func (c *criService) normalizeContainerFilter(filter *runtime.ContainerFilter) {
if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
filter.Id = cntr.ID
}
if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil {
filter.PodSandboxId = sb.ID
}
}
// filterCRIContainers filters CRIContainers.
func (c *criService) filterCRIContainers(containers []*runtime.Container, filter *runtime.ContainerFilter) []*runtime.Container {
if filter == nil {
return containers
}
// The containerd cri plugin supports short ids so long as there is only one
// match. So we do a lookup against the store here if a pod id has been
// included in the filter.
sb := filter.GetPodSandboxId()
if sb != "" {
sandbox, err := c.sandboxStore.Get(sb)
if err == nil {
sb = sandbox.ID
}
}
c.normalizeContainerFilter(filter)
filtered := []*runtime.Container{}
for _, cntr := range containers {
if filter.GetId() != "" && filter.GetId() != cntr.Id {
continue
}
if sb != "" && sb != cntr.PodSandboxId {
continue
}
if filter.GetState() != nil && filter.GetState().GetState() != cntr.State {
continue
}
if filter.GetLabelSelector() != nil {
match := true
for k, v := range filter.GetLabelSelector() {
got, ok := cntr.Labels[k]
if !ok || got != v {
match = false
break
}
}
if !match {
continue
}
}
filtered = append(filtered, cntr)
}
return filtered
}

View File

@@ -0,0 +1,366 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
func TestToCRIContainer(t *testing.T) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{Image: "test-image"},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
}
createdAt := time.Now().UnixNano()
container, err := containerstore.NewContainer(
containerstore.Metadata{
ID: "test-id",
Name: "test-name",
SandboxID: "test-sandbox-id",
Config: config,
ImageRef: "test-image-ref",
},
containerstore.WithFakeStatus(
containerstore.Status{
Pid: 1234,
CreatedAt: createdAt,
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 1,
Reason: "test-reason",
Message: "test-message",
},
),
)
assert.NoError(t, err)
expect := &runtime.Container{
Id: "test-id",
PodSandboxId: "test-sandbox-id",
Metadata: config.GetMetadata(),
Image: config.GetImage(),
ImageRef: "test-image-ref",
State: runtime.ContainerState_CONTAINER_EXITED,
CreatedAt: createdAt,
Labels: config.GetLabels(),
Annotations: config.GetAnnotations(),
}
c := toCRIContainer(container)
assert.Equal(t, expect, c)
}
func TestFilterContainers(t *testing.T) {
c := newTestCRIService()
testContainers := []*runtime.Container{
{
Id: "1",
PodSandboxId: "s-1",
Metadata: &runtime.ContainerMetadata{Name: "name-1", Attempt: 1},
State: runtime.ContainerState_CONTAINER_RUNNING,
},
{
Id: "2",
PodSandboxId: "s-2",
Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 2},
State: runtime.ContainerState_CONTAINER_EXITED,
Labels: map[string]string{"a": "b"},
},
{
Id: "3",
PodSandboxId: "s-2",
Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 3},
State: runtime.ContainerState_CONTAINER_CREATED,
Labels: map[string]string{"c": "d"},
},
}
for _, test := range []struct {
desc string
filter *runtime.ContainerFilter
expect []*runtime.Container
}{
{
desc: "no filter",
expect: testContainers,
},
{
desc: "id filter",
filter: &runtime.ContainerFilter{Id: "2"},
expect: []*runtime.Container{testContainers[1]},
},
{
desc: "state filter",
filter: &runtime.ContainerFilter{
State: &runtime.ContainerStateValue{
State: runtime.ContainerState_CONTAINER_EXITED,
},
},
expect: []*runtime.Container{testContainers[1]},
},
{
desc: "label filter",
filter: &runtime.ContainerFilter{
LabelSelector: map[string]string{"a": "b"},
},
expect: []*runtime.Container{testContainers[1]},
},
{
desc: "sandbox id filter",
filter: &runtime.ContainerFilter{PodSandboxId: "s-2"},
expect: []*runtime.Container{testContainers[1], testContainers[2]},
},
{
desc: "mixed filter not matched",
filter: &runtime.ContainerFilter{
Id: "1",
PodSandboxId: "s-2",
LabelSelector: map[string]string{"a": "b"},
},
expect: []*runtime.Container{},
},
{
desc: "mixed filter matched",
filter: &runtime.ContainerFilter{
PodSandboxId: "s-2",
State: &runtime.ContainerStateValue{
State: runtime.ContainerState_CONTAINER_CREATED,
},
LabelSelector: map[string]string{"c": "d"},
},
expect: []*runtime.Container{testContainers[2]},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
filtered := c.filterCRIContainers(testContainers, test.filter)
assert.Equal(t, test.expect, filtered, test.desc)
})
}
}
// containerForTest is a helper type for test.
type containerForTest struct {
metadata containerstore.Metadata
status containerstore.Status
}
func (c containerForTest) toContainer() (containerstore.Container, error) {
return containerstore.NewContainer(
c.metadata,
containerstore.WithFakeStatus(c.status),
)
}
func TestListContainers(t *testing.T) {
c := newTestCRIService()
sandboxesInStore := []sandboxstore.Sandbox{
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "s-1abcdef1234",
Name: "sandboxname-1",
Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-1"}},
},
sandboxstore.Status{
State: sandboxstore.StateReady,
},
),
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "s-2abcdef1234",
Name: "sandboxname-2",
Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-2"}},
},
sandboxstore.Status{
State: sandboxstore.StateNotReady,
},
),
}
createdAt := time.Now().UnixNano()
startedAt := time.Now().UnixNano()
finishedAt := time.Now().UnixNano()
containersInStore := []containerForTest{
{
metadata: containerstore.Metadata{
ID: "c-1container",
Name: "name-1",
SandboxID: "s-1abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-1"}},
},
status: containerstore.Status{CreatedAt: createdAt},
},
{
metadata: containerstore.Metadata{
ID: "c-2container",
Name: "name-2",
SandboxID: "s-1abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-2"}},
},
status: containerstore.Status{
CreatedAt: createdAt,
StartedAt: startedAt,
},
},
{
metadata: containerstore.Metadata{
ID: "c-3container",
Name: "name-3",
SandboxID: "s-1abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-3"}},
},
status: containerstore.Status{
CreatedAt: createdAt,
StartedAt: startedAt,
FinishedAt: finishedAt,
},
},
{
metadata: containerstore.Metadata{
ID: "c-4container",
Name: "name-4",
SandboxID: "s-2abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-4"}},
},
status: containerstore.Status{
CreatedAt: createdAt,
},
},
}
expectedContainers := []*runtime.Container{
{
Id: "c-1container",
PodSandboxId: "s-1abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-1"},
State: runtime.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
},
{
Id: "c-2container",
PodSandboxId: "s-1abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-2"},
State: runtime.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
},
{
Id: "c-3container",
PodSandboxId: "s-1abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-3"},
State: runtime.ContainerState_CONTAINER_EXITED,
CreatedAt: createdAt,
},
{
Id: "c-4container",
PodSandboxId: "s-2abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-4"},
State: runtime.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
},
}
// Inject test sandbox metadata
for _, sb := range sandboxesInStore {
assert.NoError(t, c.sandboxStore.Add(sb))
}
// Inject test container metadata
for _, cntr := range containersInStore {
container, err := cntr.toContainer()
assert.NoError(t, err)
assert.NoError(t, c.containerStore.Add(container))
}
for _, testdata := range []struct {
desc string
filter *runtime.ContainerFilter
expect []*runtime.Container
}{
{
desc: "test without filter",
filter: &runtime.ContainerFilter{},
expect: expectedContainers,
},
{
desc: "test filter by sandboxid",
filter: &runtime.ContainerFilter{
PodSandboxId: "s-1abcdef1234",
},
expect: expectedContainers[:3],
},
{
desc: "test filter by truncated sandboxid",
filter: &runtime.ContainerFilter{
PodSandboxId: "s-1",
},
expect: expectedContainers[:3],
},
{
desc: "test filter by containerid",
filter: &runtime.ContainerFilter{
Id: "c-1container",
},
expect: expectedContainers[:1],
},
{
desc: "test filter by truncated containerid",
filter: &runtime.ContainerFilter{
Id: "c-1",
},
expect: expectedContainers[:1],
},
{
desc: "test filter by containerid and sandboxid",
filter: &runtime.ContainerFilter{
Id: "c-1container",
PodSandboxId: "s-1abcdef1234",
},
expect: expectedContainers[:1],
},
{
desc: "test filter by truncated containerid and truncated sandboxid",
filter: &runtime.ContainerFilter{
Id: "c-1",
PodSandboxId: "s-1",
},
expect: expectedContainers[:1],
},
} {
testdata := testdata
t.Run(testdata.desc, func(t *testing.T) {
resp, err := c.ListContainers(context.Background(), &runtime.ListContainersRequest{Filter: testdata.filter})
assert.NoError(t, err)
require.NotNil(t, resp)
containers := resp.GetContainers()
assert.Len(t, containers, len(testdata.expect))
for _, cntr := range testdata.expect {
assert.Contains(t, containers, cntr)
}
})
}
}

View File

@@ -0,0 +1,52 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ReopenContainerLog asks the cri plugin to reopen the stdout/stderr log file for the container.
// This is often called after the log file has been rotated.
func (c *criService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (*runtime.ReopenContainerLogResponse, error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
if container.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
return nil, errors.New("container is not running")
}
// Create new container logger and replace the existing ones.
stdoutWC, stderrWC, err := c.createContainerLoggers(container.LogPath, container.Config.GetTty())
if err != nil {
return nil, err
}
oldStdoutWC, oldStderrWC := container.IO.AddOutput("log", stdoutWC, stderrWC)
if oldStdoutWC != nil {
oldStdoutWC.Close()
}
if oldStderrWC != nil {
oldStderrWC.Close()
}
return &runtime.ReopenContainerLogResponse{}, nil
}

View File

@@ -0,0 +1,164 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"time"
containerd "github.com/containerd/containerd/v2/client"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
"github.com/containerd/errdefs"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// RemoveContainer removes the container.
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
start := time.Now()
ctrID := r.GetContainerId()
container, err := c.containerStore.Get(ctrID)
if err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", ctrID, err)
}
// Do not return error if container metadata doesn't exist.
log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID)
return &runtime.RemoveContainerResponse{}, nil
}
id := container.ID
i, err := container.Container.Info(ctx)
if err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("get container info: %w", err)
}
// Since containerd doesn't see the container and criservice's content store does,
// we should try to recover from this state by removing entry for this container
// from the container store as well and return successfully.
log.G(ctx).WithError(err).Warn("get container info failed")
c.containerStore.Delete(ctrID)
c.containerNameIndex.ReleaseByKey(ctrID)
return &runtime.RemoveContainerResponse{}, nil
}
// Forcibly stop the containers if they are in running or unknown state
state := container.Status.Get().State()
if state == runtime.ContainerState_CONTAINER_RUNNING ||
state == runtime.ContainerState_CONTAINER_UNKNOWN {
log.L.Infof("Forcibly stopping container %q", id)
if err := c.stopContainer(ctx, container, 0); err != nil {
return nil, fmt.Errorf("failed to forcibly stop container %q: %w", id, err)
}
}
// Set removing state to prevent other start/remove operations against this container
// while it's being removed.
if err := setContainerRemoving(container); err != nil {
return nil, fmt.Errorf("failed to set removing state for container %q: %w", id, err)
}
defer func() {
if retErr != nil {
// Reset removing if remove failed.
if err := resetContainerRemoving(container); err != nil {
log.G(ctx).WithError(err).Errorf("failed to reset removing state for container %q", id)
}
}
}()
sandbox, err := c.sandboxStore.Get(container.SandboxID)
if err != nil {
err = c.nri.RemoveContainer(ctx, nil, &container)
} else {
err = c.nri.RemoveContainer(ctx, &sandbox, &container)
}
if err != nil {
log.G(ctx).WithError(err).Error("NRI failed to remove container")
}
// NOTE(random-liu): Docker set container to "Dead" state when start removing the
// container so as to avoid start/restart the container again. However, for current
// kubelet implementation, we'll never start a container once we decide to remove it,
// so we don't need the "Dead" state for now.
// Delete containerd container.
if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to delete containerd container %q: %w", id, err)
}
log.G(ctx).Tracef("Remove called for containerd container %q that does not exist", id)
}
// Delete container checkpoint.
if err := container.Delete(); err != nil {
return nil, fmt.Errorf("failed to delete container checkpoint for %q: %w", id, err)
}
containerRootDir := c.getContainerRootDir(id)
if err := ensureRemoveAll(ctx, containerRootDir); err != nil {
return nil, fmt.Errorf("failed to remove container root directory %q: %w",
containerRootDir, err)
}
volatileContainerRootDir := c.getVolatileContainerRootDir(id)
if err := ensureRemoveAll(ctx, volatileContainerRootDir); err != nil {
return nil, fmt.Errorf("failed to remove volatile container root directory %q: %w",
volatileContainerRootDir, err)
}
c.containerStore.Delete(id)
c.containerNameIndex.ReleaseByKey(id)
c.generateAndSendContainerEvent(ctx, id, container.SandboxID, runtime.ContainerEventType_CONTAINER_DELETED_EVENT)
containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start)
return &runtime.RemoveContainerResponse{}, nil
}
// setContainerRemoving sets the container into removing state. In removing state, the
// container will not be started or removed again.
func setContainerRemoving(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Do not remove container if it's still running or unknown.
if status.State() == runtime.ContainerState_CONTAINER_RUNNING {
return status, errors.New("container is still running, to stop first")
}
if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
return status, errors.New("container state is unknown, to stop first")
}
if status.Starting {
return status, errors.New("container is in starting state, can't be removed")
}
if status.Removing {
return status, errors.New("container is already in removing state")
}
status.Removing = true
return status, nil
})
}
// resetContainerRemoving resets the container removing state on remove failure. So
// that we could remove the container again.
func resetContainerRemoving(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Removing = false
return status, nil
})
}

View File

@@ -0,0 +1,92 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
)
// TestSetContainerRemoving tests setContainerRemoving sets removing
// state correctly.
func TestSetContainerRemoving(t *testing.T) {
testID := "test-id"
for _, test := range []struct {
desc string
status containerstore.Status
expectErr bool
}{
{
desc: "should return error when container is in running state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
expectErr: true,
},
{
desc: "should return error when container is in starting state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
{
desc: "should return error when container is in removing state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
Removing: true,
},
expectErr: true,
},
{
desc: "should not return error when container is not running and removing",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
expectErr: false,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: testID},
containerstore.WithFakeStatus(test.status),
)
assert.NoError(t, err)
err = setContainerRemoving(container)
if test.expectErr {
assert.Error(t, err)
assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Removing, "removing should be set")
assert.NoError(t, resetContainerRemoving(container))
assert.False(t, container.Status.Get().Removing, "removing should be reset")
}
})
}
}

View File

@@ -0,0 +1,252 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"io"
"time"
containerd "github.com/containerd/containerd/v2/client"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/errdefs"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/internal/cri/io"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
)
// StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
start := time.Now()
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
info, err := cntr.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("get container info: %w", err)
}
id := cntr.ID
meta := cntr.Metadata
container := cntr.Container
config := meta.Config
// Set starting state to prevent other start/remove operations against this container
// while it's being started.
if err := setContainerStarting(cntr); err != nil {
return nil, fmt.Errorf("failed to set starting state for container %q: %w", id, err)
}
defer func() {
if retErr != nil {
// Set container to exited if fail to start.
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = 0
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = errorStartExitCode
status.Reason = errorStartReason
status.Message = retErr.Error()
return status, nil
}); err != nil {
log.G(ctx).WithError(err).Errorf("failed to set start failure state for container %q", id)
}
}
if err := resetContainerStarting(cntr); err != nil {
log.G(ctx).WithError(err).Errorf("failed to reset starting state for container %q", id)
}
}()
// Get sandbox config from sandbox store.
sandbox, err := c.sandboxStore.Get(meta.SandboxID)
if err != nil {
return nil, fmt.Errorf("sandbox %q not found: %w", meta.SandboxID, err)
}
sandboxID := meta.SandboxID
if sandbox.Status.Get().State != sandboxstore.StateReady {
return nil, fmt.Errorf("sandbox container %q is not running", sandboxID)
}
// Recheck target container validity in Linux namespace options.
if linux := config.GetLinux(); linux != nil {
nsOpts := linux.GetSecurityContext().GetNamespaceOptions()
if nsOpts.GetPid() == runtime.NamespaceMode_TARGET {
_, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId)
if err != nil {
return nil, fmt.Errorf("invalid target container: %w", err)
}
}
}
ioCreation := func(id string) (_ containerdio.IO, err error) {
stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty())
if err != nil {
return nil, fmt.Errorf("failed to create container loggers: %w", err)
}
cntr.IO.AddOutput("log", stdoutWC, stderrWC)
cntr.IO.Pipe()
return cntr.IO, nil
}
ociRuntime, err := c.config.GetSandboxRuntime(sandbox.Config, sandbox.Metadata.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox runtime: %w", err)
}
var taskOpts []containerd.NewTaskOpts
if ociRuntime.Path != "" {
taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path))
}
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
if err != nil {
return nil, fmt.Errorf("failed to create containerd task: %w", err)
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// It's possible that task is deleted by event monitor.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete containerd task %q", id)
}
}
}()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, fmt.Errorf("failed to wait for containerd task: %w", err)
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
err = c.nri.StopContainer(deferCtx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI stop failed for failed container %q", id)
}
}
}()
err = c.nri.StartContainer(ctx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI container start failed")
return nil, fmt.Errorf("NRI container start failed: %w", err)
}
// Start containerd task.
if err := task.Start(ctx); err != nil {
return nil, fmt.Errorf("failed to start containerd task %q: %w", id, err)
}
// Update container start timestamp.
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = task.Pid()
status.StartedAt = time.Now().UnixNano()
return status, nil
}); err != nil {
return nil, fmt.Errorf("failed to update container %q state: %w", id, err)
}
// It handles the TaskExit event and update container state after this.
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)
c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_STARTED_EVENT)
err = c.nri.PostStartContainer(ctx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI post-start notification failed")
}
containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start)
return &runtime.StartContainerResponse{}, nil
}
// setContainerStarting sets the container into starting state. In starting state, the
// container will not be removed or started again.
func setContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return status, fmt.Errorf("container is in %s state", criContainerStateToString(status.State()))
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return status, errors.New("container is in removing state, can't be started")
}
if status.Starting {
return status, errors.New("container is already in starting state")
}
status.Starting = true
return status, nil
})
}
// resetContainerStarting resets the container starting state on start failure. So
// that we could remove the container later.
func resetContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Starting = false
return status, nil
})
}
// createContainerLoggers creates container loggers and return write closer for stdout and stderr.
func (c *criService) createContainerLoggers(logPath string, tty bool) (stdout io.WriteCloser, stderr io.WriteCloser, err error) {
if logPath != "" {
// Only generate container log when log path is specified.
f, err := openLogFile(logPath)
if err != nil {
return nil, nil, fmt.Errorf("failed to create and open log file: %w", err)
}
defer func() {
if err != nil {
f.Close()
}
}()
var stdoutCh, stderrCh <-chan struct{}
wc := cioutil.NewSerialWriteCloser(f)
stdout, stdoutCh = cio.NewCRILogger(logPath, wc, cio.Stdout, c.config.MaxContainerLogLineSize)
// Only redirect stderr when there is no tty.
if !tty {
stderr, stderrCh = cio.NewCRILogger(logPath, wc, cio.Stderr, c.config.MaxContainerLogLineSize)
}
go func() {
if stdoutCh != nil {
<-stdoutCh
}
if stderrCh != nil {
<-stderrCh
}
log.L.Debugf("Finish redirecting log file %q, closing it", logPath)
f.Close()
}()
} else {
stdout = cio.NewDiscardLogger()
stderr = cio.NewDiscardLogger()
}
return
}

View File

@@ -0,0 +1,106 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
)
// TestSetContainerStarting tests setContainerStarting sets removing
// state correctly.
func TestSetContainerStarting(t *testing.T) {
testID := "test-id"
for _, test := range []struct {
desc string
status containerstore.Status
expectErr bool
}{
{
desc: "should not return error when container is in created state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
},
expectErr: false,
},
{
desc: "should return error when container is in running state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
expectErr: true,
},
{
desc: "should return error when container is in exited state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
expectErr: true,
},
{
desc: "should return error when container is in unknown state",
status: containerstore.Status{
CreatedAt: 0,
StartedAt: 0,
FinishedAt: 0,
},
expectErr: true,
},
{
desc: "should return error when container is in starting state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
{
desc: "should return error when container is in removing state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Removing: true,
},
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: testID},
containerstore.WithFakeStatus(test.status),
)
assert.NoError(t, err)
err = setContainerStarting(container)
if test.expectErr {
assert.Error(t, err)
assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Starting, "starting should be set")
assert.NoError(t, resetContainerStarting(container))
assert.False(t, container.Status.Get().Starting, "starting should be reset")
}
})
}
}

View File

@@ -0,0 +1,53 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"github.com/containerd/containerd/v2/api/services/tasks/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ContainerStats returns stats of the container. If the container does not
// exist, the call returns an error.
func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerStatsRequest) (*runtime.ContainerStatsResponse, error) {
cntr, err := c.containerStore.Get(in.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container: %w", err)
}
request := &tasks.MetricsRequest{Filters: []string{"id==" + cntr.ID}}
resp, err := c.client.TaskService().Metrics(ctx, request)
if err != nil {
return nil, fmt.Errorf("failed to fetch metrics for task: %w", err)
}
if len(resp.Metrics) != 1 {
return nil, fmt.Errorf("unexpected metrics response: %+v", resp.Metrics)
}
handler, err := c.getMetricsHandler(ctx, cntr.SandboxID)
if err != nil {
return nil, err
}
cs, err := handler(cntr.Metadata, resp.Metrics[0])
if err != nil {
return nil, fmt.Errorf("failed to decode container metrics: %w", err)
}
return &runtime.ContainerStatsResponse{Stats: cs}, nil
}

View File

@@ -0,0 +1,512 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"reflect"
"time"
wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
cg1 "github.com/containerd/cgroups/v3/cgroup1/stats"
cg2 "github.com/containerd/cgroups/v3/cgroup2/stats"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/api/services/tasks/v1"
"github.com/containerd/containerd/v2/api/types"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
"github.com/containerd/containerd/v2/internal/cri/store/stats"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
)
// ListContainerStats returns stats of all running containers.
func (c *criService) ListContainerStats(
ctx context.Context,
in *runtime.ListContainerStatsRequest,
) (*runtime.ListContainerStatsResponse, error) {
request, containers, err := c.buildTaskMetricsRequest(in)
if err != nil {
return nil, fmt.Errorf("failed to build metrics request: %w", err)
}
resp, err := c.client.TaskService().Metrics(ctx, request)
if err != nil {
return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err)
}
criStats, err := c.toCRIContainerStats(ctx, resp.Metrics, containers)
if err != nil {
return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err)
}
return criStats, nil
}
type metricsHandler func(containerstore.Metadata, *types.Metric) (*runtime.ContainerStats, error)
// Returns a function to be used for transforming container metrics into the right format.
// Uses the platform the given sandbox advertises to implement its logic. If the platform is
// unsupported for metrics this will return a wrapped [errdefs.ErrNotImplemented].
func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (metricsHandler, error) {
sandbox, err := c.sandboxStore.Get(sandboxID)
if err != nil {
return nil, fmt.Errorf("failed to find sandbox id %q: %w", sandboxID, err)
}
controller, err := c.sandboxService.SandboxController(sandbox.Config, sandbox.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox controller: %w", err)
}
// Grab the platform that this containers sandbox advertises. Reason being, even if
// the host may be {insert platform}, if it virtualizes or emulates a different platform
// it will return stats in that format, and we need to handle the conversion logic based
// off of this info.
p, err := controller.Platform(ctx, sandboxID)
if err != nil {
return nil, err
}
ociRuntime, err := c.config.GetSandboxRuntime(sandbox.Config, sandbox.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get runtimeHandler %q: %w", sandbox.RuntimeHandler, err)
}
snapshotter := c.RuntimeSnapshotter(ctx, ociRuntime)
switch p.OS {
case "windows":
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
return c.windowsContainerMetrics(meta, stats, snapshotter)
}, nil
case "linux":
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
return c.linuxContainerMetrics(meta, stats, snapshotter)
}, nil
default:
return nil, fmt.Errorf("container metrics for platform %+v: %w", p, errdefs.ErrNotImplemented)
}
}
func (c *criService) toCRIContainerStats(
ctx context.Context,
stats []*types.Metric,
containers []containerstore.Container,
) (*runtime.ListContainerStatsResponse, error) {
statsMap := make(map[string]*types.Metric)
for _, stat := range stats {
statsMap[stat.ID] = stat
}
containerStats := new(runtime.ListContainerStatsResponse)
// Unfortunately if no filter was passed we're asking for every containers stats which
// generally belong to multiple different pods, who all might have different platforms.
// To avoid recalculating the right metricsHandler to invoke, if we've already calculated
// the platform and handler for a given sandbox just pull it from our map here.
var (
err error
handler metricsHandler
)
sandboxToMetricsHandler := make(map[string]metricsHandler)
for _, cntr := range containers {
h, ok := sandboxToMetricsHandler[cntr.SandboxID]
if !ok {
handler, err = c.getMetricsHandler(ctx, cntr.SandboxID)
if err != nil {
// If the sandbox is not found, it may have been removed. we need to check container whether it is still exist
if errdefs.IsNotFound(err) {
_, err = c.containerStore.Get(cntr.ID)
if err != nil && errdefs.IsNotFound(err) {
log.G(ctx).Warnf("container %q is not found, skip it", cntr.ID)
continue
}
}
return nil, fmt.Errorf("failed to get metrics handler for container %q: %w", cntr.ID, err)
}
sandboxToMetricsHandler[cntr.SandboxID] = handler
} else {
handler = h
}
cs, err := handler(cntr.Metadata, statsMap[cntr.ID])
if err != nil {
return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err)
}
if cs.Cpu != nil && cs.Cpu.UsageCoreNanoSeconds != nil {
// this is a calculated value and should be computed for all OSes
nanoUsage, err := c.getUsageNanoCores(cntr.Metadata.ID, false, cs.Cpu.UsageCoreNanoSeconds.Value, time.Unix(0, cs.Cpu.Timestamp))
if err != nil {
return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", cntr.Metadata.ID, err)
}
cs.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoUsage}
}
containerStats.Stats = append(containerStats.Stats, cs)
}
return containerStats, nil
}
func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, currentUsageCoreNanoSeconds uint64, currentTimestamp time.Time) (uint64, error) {
var oldStats *stats.ContainerStats
if isSandbox {
sandbox, err := c.sandboxStore.Get(containerID)
if err != nil {
return 0, fmt.Errorf("failed to get sandbox container: %s: %w", containerID, err)
}
oldStats = sandbox.Stats
} else {
container, err := c.containerStore.Get(containerID)
if err != nil {
return 0, fmt.Errorf("failed to get container ID: %s: %w", containerID, err)
}
oldStats = container.Stats
}
if oldStats == nil {
newStats := &stats.ContainerStats{
UsageCoreNanoSeconds: currentUsageCoreNanoSeconds,
Timestamp: currentTimestamp,
}
if isSandbox {
err := c.sandboxStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update sandbox stats container ID: %s: %w", containerID, err)
}
} else {
err := c.containerStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err)
}
}
return 0, nil
}
nanoSeconds := currentTimestamp.UnixNano() - oldStats.Timestamp.UnixNano()
// zero or negative interval
if nanoSeconds <= 0 {
return 0, nil
}
newUsageNanoCores := uint64(float64(currentUsageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) /
float64(nanoSeconds) * float64(time.Second/time.Nanosecond))
newStats := &stats.ContainerStats{
UsageCoreNanoSeconds: currentUsageCoreNanoSeconds,
Timestamp: currentTimestamp,
}
if isSandbox {
err := c.sandboxStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update sandbox container stats: %s: %w", containerID, err)
}
} else {
err := c.containerStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err)
}
}
return newUsageNanoCores, nil
}
func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) {
if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
filter.Id = cntr.ID
}
if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil {
filter.PodSandboxId = sb.ID
}
}
// buildTaskMetricsRequest constructs a tasks.MetricsRequest based on
// the information in the stats request and the containerStore
func (c *criService) buildTaskMetricsRequest(
r *runtime.ListContainerStatsRequest,
) (*tasks.MetricsRequest, []containerstore.Container, error) {
req := &tasks.MetricsRequest{}
if r.GetFilter() == nil {
return req, c.containerStore.List(), nil
}
c.normalizeContainerStatsFilter(r.GetFilter())
var containers []containerstore.Container
for _, cntr := range c.containerStore.List() {
if r.GetFilter().GetId() != "" && cntr.ID != r.GetFilter().GetId() {
continue
}
if r.GetFilter().GetPodSandboxId() != "" && cntr.SandboxID != r.GetFilter().GetPodSandboxId() {
continue
}
if r.GetFilter().GetLabelSelector() != nil &&
!matchLabelSelector(r.GetFilter().GetLabelSelector(), cntr.Config.GetLabels()) {
continue
}
containers = append(containers, cntr)
req.Filters = append(req.Filters, "id=="+cntr.ID)
}
return req, containers, nil
}
func matchLabelSelector(selector, labels map[string]string) bool {
for k, v := range selector {
if val, ok := labels[k]; ok {
if v != val {
return false
}
} else {
return false
}
}
return true
}
func (c *criService) windowsContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
snapshotter string,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.GetSnapshot(meta.ID, snapshotter)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPaths[snapshotter],
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
if stats != nil {
s, err := typeurl.UnmarshalAny(stats.Data)
if err != nil {
return nil, fmt.Errorf("failed to extract container metrics: %w", err)
}
wstats := s.(*wstats.Statistics).GetWindows()
if wstats == nil {
return nil, errors.New("windows stats is empty")
}
if wstats.Processor != nil {
cs.Cpu = &runtime.CpuUsage{
Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS},
}
}
if wstats.Memory != nil {
cs.Memory = &runtime.MemoryUsage{
Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes,
},
}
}
}
return &cs, nil
}
func (c *criService) linuxContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
snapshotter string,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.GetSnapshot(meta.ID, snapshotter)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPaths[snapshotter],
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
if stats != nil {
var data interface{}
switch {
case typeurl.Is(stats.Data, (*cg1.Metrics)(nil)):
data = &cg1.Metrics{}
case typeurl.Is(stats.Data, (*cg2.Metrics)(nil)):
data = &cg2.Metrics{}
case typeurl.Is(stats.Data, (*wstats.Statistics)(nil)):
data = &wstats.Statistics{}
default:
return nil, errors.New("cannot convert metric data to cgroups.Metrics or windows.Statistics")
}
if err := typeurl.UnmarshalTo(stats.Data, data); err != nil {
return nil, fmt.Errorf("failed to extract container metrics: %w", err)
}
cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp))
if err != nil {
return nil, fmt.Errorf("failed to obtain cpu stats: %w", err)
}
cs.Cpu = cpuStats
memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp))
if err != nil {
return nil, fmt.Errorf("failed to obtain memory stats: %w", err)
}
cs.Memory = memoryStats
}
return &cs, nil
}
// getWorkingSet calculates workingset memory from cgroup memory stats.
// The caller should make sure memory is not nil.
// workingset = usage - total_inactive_file
func getWorkingSet(memory *cg1.MemoryStat) uint64 {
if memory.Usage == nil {
return 0
}
var workingSet uint64
if memory.TotalInactiveFile < memory.Usage.Usage {
workingSet = memory.Usage.Usage - memory.TotalInactiveFile
}
return workingSet
}
// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats.
// The caller should make sure memory is not nil.
// workingset = usage - inactive_file
func getWorkingSetV2(memory *cg2.MemoryStat) uint64 {
var workingSet uint64
if memory.InactiveFile < memory.Usage {
workingSet = memory.Usage - memory.InactiveFile
}
return workingSet
}
func isMemoryUnlimited(v uint64) bool {
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
// TODO: k8s or cadvisor should export this https://github.com/google/cadvisor/blob/2b6fbacac7598e0140b5bc8428e3bdd7d86cf5b9/metrics/prometheus.go#L1969-L1971
const maxMemorySize = uint64(1 << 62)
return v > maxMemorySize
}
// https://github.com/kubernetes/kubernetes/blob/b47f8263e18c7b13dba33fba23187e5e0477cdbd/pkg/kubelet/stats/helper.go#L68-L71
func getAvailableBytes(memory *cg1.MemoryStat, workingSetBytes uint64) uint64 {
// memory limit - working set bytes
if !isMemoryUnlimited(memory.Usage.Limit) {
return memory.Usage.Limit - workingSetBytes
}
return 0
}
func getAvailableBytesV2(memory *cg2.MemoryStat, workingSetBytes uint64) uint64 {
// memory limit (memory.max) for cgroupv2 - working set bytes
if !isMemoryUnlimited(memory.UsageLimit) {
return memory.UsageLimit - workingSetBytes
}
return 0
}
func (c *criService) cpuContainerStats(ID string, isSandbox bool, stats interface{}, timestamp time.Time) (*runtime.CpuUsage, error) {
switch metrics := stats.(type) {
case *cg1.Metrics:
metrics.GetCPU().GetUsage()
if metrics.CPU != nil && metrics.CPU.Usage != nil {
return &runtime.CpuUsage{
Timestamp: timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total},
}, nil
}
case *cg2.Metrics:
if metrics.CPU != nil {
// convert to nano seconds
usageCoreNanoSeconds := metrics.CPU.UsageUsec * 1000
return &runtime.CpuUsage{
Timestamp: timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: usageCoreNanoSeconds},
}, nil
}
default:
return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath())
}
return nil, nil
}
func (c *criService) memoryContainerStats(ID string, stats interface{}, timestamp time.Time) (*runtime.MemoryUsage, error) {
switch metrics := stats.(type) {
case *cg1.Metrics:
if metrics.Memory != nil && metrics.Memory.Usage != nil {
workingSetBytes := getWorkingSet(metrics.Memory)
return &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: workingSetBytes,
},
AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytes(metrics.Memory, workingSetBytes)},
UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage.Usage},
RssBytes: &runtime.UInt64Value{Value: metrics.Memory.TotalRSS},
PageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgFault},
MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgMajFault},
}, nil
}
case *cg2.Metrics:
if metrics.Memory != nil {
workingSetBytes := getWorkingSetV2(metrics.Memory)
return &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: workingSetBytes,
},
AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytesV2(metrics.Memory, workingSetBytes)},
UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage},
// Use Anon memory for RSS as cAdvisor on cgroupv2
// see https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799
RssBytes: &runtime.UInt64Value{Value: metrics.Memory.Anon},
PageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgfault},
MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgmajfault},
}, nil
}
default:
return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath())
}
return nil, nil
}

View File

@@ -0,0 +1,437 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"math"
"reflect"
"testing"
"time"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
v2 "github.com/containerd/cgroups/v3/cgroup2/stats"
"github.com/containerd/containerd/v2/api/types"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestContainerMetricsCPUNanoCoreUsage(t *testing.T) {
c := newTestCRIService()
timestamp := time.Now()
secondAfterTimeStamp := timestamp.Add(time.Second)
ID := "ID"
for _, test := range []struct {
desc string
firstCPUValue uint64
secondCPUValue uint64
expectedNanoCoreUsageFirst uint64
expectedNanoCoreUsageSecond uint64
}{
{
desc: "metrics",
firstCPUValue: 50,
secondCPUValue: 500,
expectedNanoCoreUsageFirst: 0,
expectedNanoCoreUsageSecond: 450,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: ID},
)
assert.NoError(t, err)
assert.Nil(t, container.Stats)
err = c.containerStore.Add(container)
assert.NoError(t, err)
cpuUsage, err := c.getUsageNanoCores(ID, false, test.firstCPUValue, timestamp)
assert.NoError(t, err)
container, err = c.containerStore.Get(ID)
assert.NoError(t, err)
assert.NotNil(t, container.Stats)
assert.Equal(t, test.expectedNanoCoreUsageFirst, cpuUsage)
cpuUsage, err = c.getUsageNanoCores(ID, false, test.secondCPUValue, secondAfterTimeStamp)
assert.NoError(t, err)
assert.Equal(t, test.expectedNanoCoreUsageSecond, cpuUsage)
container, err = c.containerStore.Get(ID)
assert.NoError(t, err)
assert.NotNil(t, container.Stats)
})
}
}
func TestGetWorkingSet(t *testing.T) {
for _, test := range []struct {
desc string
memory *v1.MemoryStat
expected uint64
}{
{
desc: "nil memory usage",
memory: &v1.MemoryStat{},
expected: 0,
},
{
desc: "memory usage higher than inactive_total_file",
memory: &v1.MemoryStat{
TotalInactiveFile: 1000,
Usage: &v1.MemoryEntry{Usage: 2000},
},
expected: 1000,
},
{
desc: "memory usage lower than inactive_total_file",
memory: &v1.MemoryStat{
TotalInactiveFile: 2000,
Usage: &v1.MemoryEntry{Usage: 1000},
},
expected: 0,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getWorkingSet(test.memory)
assert.Equal(t, test.expected, got)
})
}
}
func TestGetWorkingSetV2(t *testing.T) {
for _, test := range []struct {
desc string
memory *v2.MemoryStat
expected uint64
}{
{
desc: "nil memory usage",
memory: &v2.MemoryStat{},
expected: 0,
},
{
desc: "memory usage higher than inactive_total_file",
memory: &v2.MemoryStat{
InactiveFile: 1000,
Usage: 2000,
},
expected: 1000,
},
{
desc: "memory usage lower than inactive_total_file",
memory: &v2.MemoryStat{
InactiveFile: 2000,
Usage: 1000,
},
expected: 0,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getWorkingSetV2(test.memory)
assert.Equal(t, test.expected, got)
})
}
}
func TestGetAvailableBytes(t *testing.T) {
for _, test := range []struct {
desc string
memory *v1.MemoryStat
workingSetBytes uint64
expected uint64
}{
{
desc: "no limit",
memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: math.MaxUint64, // no limit
Usage: 1000,
},
},
workingSetBytes: 500,
expected: 0,
},
{
desc: "with limit",
memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: 5000,
Usage: 1000,
},
},
workingSetBytes: 500,
expected: 5000 - 500,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getAvailableBytes(test.memory, test.workingSetBytes)
assert.Equal(t, test.expected, got)
})
}
}
func TestGetAvailableBytesV2(t *testing.T) {
for _, test := range []struct {
desc string
memory *v2.MemoryStat
workingSetBytes uint64
expected uint64
}{
{
desc: "no limit",
memory: &v2.MemoryStat{
UsageLimit: math.MaxUint64, // no limit
Usage: 1000,
},
workingSetBytes: 500,
expected: 0,
},
{
desc: "with limit",
memory: &v2.MemoryStat{
UsageLimit: 5000,
Usage: 1000,
},
workingSetBytes: 500,
expected: 5000 - 500,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getAvailableBytesV2(test.memory, test.workingSetBytes)
assert.Equal(t, test.expected, got)
})
}
}
func TestContainerMetricsMemory(t *testing.T) {
c := newTestCRIService()
timestamp := time.Now()
for _, test := range []struct {
desc string
metrics interface{}
expected *runtime.MemoryUsage
}{
{
desc: "v1 metrics - no memory limit",
metrics: &v1.Metrics{
Memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: math.MaxUint64, // no limit
Usage: 1000,
},
TotalRSS: 10,
TotalPgFault: 11,
TotalPgMajFault: 12,
TotalInactiveFile: 500,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 500},
AvailableBytes: &runtime.UInt64Value{Value: 0},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 10},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
{
desc: "v1 metrics - memory limit",
metrics: &v1.Metrics{
Memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: 5000,
Usage: 1000,
},
TotalRSS: 10,
TotalPgFault: 11,
TotalPgMajFault: 12,
TotalInactiveFile: 500,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 500},
AvailableBytes: &runtime.UInt64Value{Value: 4500},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 10},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
{
desc: "v2 metrics - memory limit",
metrics: &v2.Metrics{
Memory: &v2.MemoryStat{
Usage: 1000,
UsageLimit: 5000,
InactiveFile: 0,
Pgfault: 11,
Pgmajfault: 12,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 1000},
AvailableBytes: &runtime.UInt64Value{Value: 4000},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 0},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
{
desc: "v2 metrics - no memory limit",
metrics: &v2.Metrics{
Memory: &v2.MemoryStat{
Usage: 1000,
UsageLimit: math.MaxUint64, // no limit
InactiveFile: 0,
Pgfault: 11,
Pgmajfault: 12,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 1000},
AvailableBytes: &runtime.UInt64Value{Value: 0},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 0},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got, err := c.memoryContainerStats("ID", test.metrics, timestamp)
assert.NoError(t, err)
assert.Equal(t, test.expected, got)
})
}
}
func TestListContainerStats(t *testing.T) {
c := newTestCRIService()
type args struct {
ctx context.Context
stats []*types.Metric
containers []containerstore.Container
}
tests := []struct {
name string
args args
before func()
after func()
want *runtime.ListContainerStatsResponse
wantErr bool
}{
{
name: "args containers having c1,but containerStore not found c1, so filter c1",
args: args{
ctx: context.Background(),
stats: []*types.Metric{
{
ID: "c1",
},
},
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{
ID: "c1",
SandboxID: "s1",
},
},
},
},
want: &runtime.ListContainerStatsResponse{},
},
{
name: "args containers having c1,c2, but containerStore not found c1, so filter c1",
args: args{
ctx: context.Background(),
stats: []*types.Metric{
{
ID: "c1",
},
{
ID: "c2",
},
},
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{
ID: "c1",
SandboxID: "s1",
},
},
{
Metadata: containerstore.Metadata{
ID: "c2",
SandboxID: "s2",
},
},
},
},
before: func() {
c.containerStore.Add(containerstore.Container{
Metadata: containerstore.Metadata{
ID: "c2",
},
})
c.sandboxStore.Add(sandboxstore.Sandbox{
Metadata: sandboxstore.Metadata{
ID: "s2",
},
})
},
wantErr: true,
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.before != nil {
tt.before()
}
got, err := c.toCRIContainerStats(tt.args.ctx, tt.args.stats, tt.args.containers)
if tt.after != nil {
tt.after()
}
if (err != nil) != tt.wantErr {
t.Errorf("ListContainerStats() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ListContainerStats() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -0,0 +1,185 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"encoding/json"
"fmt"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
"github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/errdefs"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ContainerStatus inspects the container and returns the status.
func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (*runtime.ContainerStatusResponse, error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
// TODO(random-liu): Clean up the following logic in CRI.
// Current assumption:
// * ImageSpec in container config is image ID.
// * ImageSpec in container status is image tag.
// * ImageRef in container status is repo digest.
spec := container.Config.GetImage()
imageRef := container.ImageRef
image, err := c.GetImage(imageRef)
if err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to get image %q: %w", imageRef, err)
}
} else {
repoTags, repoDigests := util.ParseImageReferences(image.References)
if len(repoTags) > 0 {
// Based on current behavior of dockershim, this field should be
// image tag.
spec = &runtime.ImageSpec{Image: repoTags[0]}
}
if len(repoDigests) > 0 {
// Based on the CRI definition, this field will be consumed by user.
imageRef = repoDigests[0]
}
}
status := toCRIContainerStatus(container, spec, imageRef)
if status.GetCreatedAt() == 0 {
// CRI doesn't allow CreatedAt == 0.
info, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get CreatedAt in %q state: %w", status.State, err)
}
status.CreatedAt = info.CreatedAt.UnixNano()
}
info, err := toCRIContainerInfo(ctx, container, r.GetVerbose())
if err != nil {
return nil, fmt.Errorf("failed to get verbose container info: %w", err)
}
return &runtime.ContainerStatusResponse{
Status: status,
Info: info,
}, nil
}
// toCRIContainerStatus converts internal container object to CRI container status.
func toCRIContainerStatus(container containerstore.Container, spec *runtime.ImageSpec, imageRef string) *runtime.ContainerStatus {
meta := container.Metadata
status := container.Status.Get()
reason := status.Reason
if status.State() == runtime.ContainerState_CONTAINER_EXITED && reason == "" {
if status.ExitCode == 0 {
reason = completeExitReason
} else {
reason = errorExitReason
}
}
// If container is in the created state, not set started and finished unix timestamps
var st, ft int64
switch status.State() {
case runtime.ContainerState_CONTAINER_RUNNING:
// If container is in the running state, set started unix timestamps
st = status.StartedAt
case runtime.ContainerState_CONTAINER_EXITED, runtime.ContainerState_CONTAINER_UNKNOWN:
st, ft = status.StartedAt, status.FinishedAt
}
return &runtime.ContainerStatus{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
State: status.State(),
CreatedAt: status.CreatedAt,
StartedAt: st,
FinishedAt: ft,
ExitCode: status.ExitCode,
Image: spec,
ImageRef: imageRef,
Reason: reason,
Message: status.Message,
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
Mounts: meta.Config.GetMounts(),
LogPath: meta.LogPath,
Resources: status.Resources,
}
}
// ContainerInfo is extra information for a container.
type ContainerInfo struct {
// TODO(random-liu): Add sandboxID in CRI container status.
SandboxID string `json:"sandboxID"`
Pid uint32 `json:"pid"`
Removing bool `json:"removing"`
SnapshotKey string `json:"snapshotKey"`
Snapshotter string `json:"snapshotter"`
RuntimeType string `json:"runtimeType"`
RuntimeOptions interface{} `json:"runtimeOptions"`
Config *runtime.ContainerConfig `json:"config"`
RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"`
}
// toCRIContainerInfo converts internal container object information to CRI container status response info map.
func toCRIContainerInfo(ctx context.Context, container containerstore.Container, verbose bool) (map[string]string, error) {
if !verbose {
return nil, nil
}
meta := container.Metadata
status := container.Status.Get()
// TODO(random-liu): Change CRI status info to use array instead of map.
ci := &ContainerInfo{
SandboxID: container.SandboxID,
Pid: status.Pid,
Removing: status.Removing,
Config: meta.Config,
}
var err error
ci.RuntimeSpec, err = container.Container.Spec(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get container runtime spec: %w", err)
}
ctrInfo, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get container info: %w", err)
}
ci.SnapshotKey = ctrInfo.SnapshotKey
ci.Snapshotter = ctrInfo.Snapshotter
runtimeOptions, err := getRuntimeOptions(ctrInfo)
if err != nil {
return nil, fmt.Errorf("failed to get runtime options: %w", err)
}
ci.RuntimeType = ctrInfo.Runtime.Name
ci.RuntimeOptions = runtimeOptions
infoBytes, err := json.Marshal(ci)
if err != nil {
return nil, fmt.Errorf("failed to marshal info %v: %w", ci, err)
}
return map[string]string{
"info": string(infoBytes),
}, nil
}

View File

@@ -0,0 +1,304 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"testing"
"time"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
)
func getContainerStatusTestData() (*containerstore.Metadata, *containerstore.Status,
*imagestore.Image, *runtime.ContainerStatus) {
imageID := "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
testID := "test-id"
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{Image: "test-image"},
Mounts: []*runtime.Mount{{
ContainerPath: "test-container-path",
HostPath: "test-host-path",
}},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
}
createdAt := time.Now().UnixNano()
metadata := &containerstore.Metadata{
ID: testID,
Name: "test-long-name",
SandboxID: "test-sandbox-id",
Config: config,
ImageRef: imageID,
LogPath: "test-log-path",
}
status := &containerstore.Status{
Pid: 1234,
CreatedAt: createdAt,
}
image := &imagestore.Image{
ID: imageID,
References: []string{
"gcr.io/library/busybox:latest",
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
}
expected := &runtime.ContainerStatus{
Id: testID,
Metadata: config.GetMetadata(),
State: runtime.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
Image: &runtime.ImageSpec{Image: "gcr.io/library/busybox:latest"},
ImageRef: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
Reason: completeExitReason,
Labels: config.GetLabels(),
Annotations: config.GetAnnotations(),
Mounts: config.GetMounts(),
LogPath: "test-log-path",
}
return metadata, status, image, expected
}
func TestToCRIContainerStatus(t *testing.T) {
for _, test := range []struct {
desc string
startedAt int64
finishedAt int64
exitCode int32
reason string
message string
expectedState runtime.ContainerState
expectedReason string
}{
{
desc: "container created",
expectedState: runtime.ContainerState_CONTAINER_CREATED,
},
{
desc: "container running",
startedAt: time.Now().UnixNano(),
expectedState: runtime.ContainerState_CONTAINER_RUNNING,
},
{
desc: "container exited with reason",
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
exitCode: 1,
reason: "test-reason",
message: "test-message",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
expectedReason: "test-reason",
},
{
desc: "container exited with exit code 0 without reason",
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
exitCode: 0,
message: "test-message",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
expectedReason: completeExitReason,
},
{
desc: "container exited with non-zero exit code without reason",
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
exitCode: 1,
message: "test-message",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
expectedReason: errorExitReason,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
metadata, status, _, expected := getContainerStatusTestData()
// Update status with test case.
status.StartedAt = test.startedAt
status.FinishedAt = test.finishedAt
status.ExitCode = test.exitCode
status.Reason = test.reason
status.Message = test.message
container, err := containerstore.NewContainer(
*metadata,
containerstore.WithFakeStatus(*status),
)
assert.NoError(t, err)
// Set expectation based on test case.
expected.Reason = test.expectedReason
expected.StartedAt = test.startedAt
expected.FinishedAt = test.finishedAt
expected.ExitCode = test.exitCode
expected.Message = test.message
patchExceptedWithState(expected, test.expectedState)
containerStatus := toCRIContainerStatus(container,
expected.Image,
expected.ImageRef)
assert.Equal(t, expected, containerStatus, test.desc)
})
}
}
// TODO(mikebrow): add a fake containerd container.Container.Spec client api so we can test verbose is true option
func TestToCRIContainerInfo(t *testing.T) {
metadata, status, _, _ := getContainerStatusTestData()
container, err := containerstore.NewContainer(
*metadata,
containerstore.WithFakeStatus(*status),
)
assert.NoError(t, err)
info, err := toCRIContainerInfo(context.Background(),
container,
false)
assert.NoError(t, err)
assert.Nil(t, info)
}
func TestContainerStatus(t *testing.T) {
for _, test := range []struct {
desc string
exist bool
imageExist bool
startedAt int64
finishedAt int64
reason string
expectedState runtime.ContainerState
expectErr bool
}{
{
desc: "container created",
exist: true,
imageExist: true,
expectedState: runtime.ContainerState_CONTAINER_CREATED,
},
{
desc: "container running",
exist: true,
imageExist: true,
startedAt: time.Now().UnixNano(),
expectedState: runtime.ContainerState_CONTAINER_RUNNING,
},
{
desc: "container exited",
exist: true,
imageExist: true,
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
reason: "test-reason",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
},
{
desc: "container not exist",
exist: false,
imageExist: true,
expectErr: true,
},
{
desc: "image not exist",
exist: false,
imageExist: false,
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
metadata, status, image, expected := getContainerStatusTestData()
// Update status with test case.
status.StartedAt = test.startedAt
status.FinishedAt = test.finishedAt
status.Reason = test.reason
container, err := containerstore.NewContainer(
*metadata,
containerstore.WithFakeStatus(*status),
)
assert.NoError(t, err)
if test.exist {
assert.NoError(t, c.containerStore.Add(container))
}
if test.imageExist {
imageStore, err := imagestore.NewFakeStore([]imagestore.Image{*image})
assert.NoError(t, err)
c.ImageService = &fakeImageService{imageStore: imageStore}
}
resp, err := c.ContainerStatus(context.Background(), &runtime.ContainerStatusRequest{ContainerId: container.ID})
if test.expectErr {
assert.Error(t, err)
assert.Nil(t, resp)
return
}
// Set expectation based on test case.
expected.StartedAt = test.startedAt
expected.FinishedAt = test.finishedAt
expected.Reason = test.reason
patchExceptedWithState(expected, test.expectedState)
assert.Equal(t, expected, resp.GetStatus())
})
}
}
type fakeImageService struct {
imageStore *imagestore.Store
}
func (s *fakeImageService) RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string {
return ""
}
func (s *fakeImageService) UpdateImage(ctx context.Context, r string) error { return nil }
func (s *fakeImageService) CheckImages(ctx context.Context) error { return nil }
func (s *fakeImageService) GetImage(id string) (imagestore.Image, error) { return s.imageStore.Get(id) }
func (s *fakeImageService) GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error) {
return snapshotstore.Snapshot{}, errors.New("not implemented")
}
func (s *fakeImageService) LocalResolve(refOrID string) (imagestore.Image, error) {
return imagestore.Image{}, errors.New("not implemented")
}
func (s *fakeImageService) ImageFSPaths() map[string]string { return make(map[string]string) }
func (s *fakeImageService) PullImage(context.Context, string, func(string) (string, string, error), *runtime.PodSandboxConfig) (string, error) {
return "", errors.New("not implemented")
}
func patchExceptedWithState(expected *runtime.ContainerStatus, state runtime.ContainerState) {
expected.State = state
switch state {
case runtime.ContainerState_CONTAINER_CREATED:
expected.StartedAt, expected.FinishedAt = 0, 0
case runtime.ContainerState_CONTAINER_RUNNING:
expected.FinishedAt = 0
}
}

View File

@@ -0,0 +1,219 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"sync/atomic"
"syscall"
"time"
eventtypes "github.com/containerd/containerd/v2/api/events"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/moby/sys/signal"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// StopContainer stops a running container with a grace period (i.e., timeout).
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
start := time.Now()
// Get container config from container store.
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
return nil, err
}
sandbox, err := c.sandboxStore.Get(container.SandboxID)
if err != nil {
err = c.nri.StopContainer(ctx, nil, &container)
} else {
err = c.nri.StopContainer(ctx, &sandbox, &container)
}
if err != nil {
log.G(ctx).WithError(err).Error("NRI failed to stop container")
}
i, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("get container info: %w", err)
}
containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start)
return &runtime.StopContainerResponse{}, nil
}
// stopContainer stops a container based on the container metadata.
func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
id := container.ID
sandboxID := container.SandboxID
// Return without error if container is not running. This makes sure that
// stop only takes real action after the container is started.
state := container.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING &&
state != runtime.ContainerState_CONTAINER_UNKNOWN {
log.G(ctx).Infof("Container to stop %q must be in running or unknown state, current state %q",
id, criContainerStateToString(state))
return nil
}
task, err := container.Container.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get task for container %q: %w", id, err)
}
// Don't return for unknown state, some cleanup needs to be done.
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
return cleanupUnknownContainer(ctx, id, container, sandboxID, c)
}
return nil
}
// Handle unknown state.
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
// Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to wait for task for %q: %w", id, err)
}
return cleanupUnknownContainer(ctx, id, container, sandboxID, c)
}
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startContainerExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
}
// We only need to kill the task. The event handler will Delete the
// task from containerd after it handles the Exited event.
if timeout > 0 {
stopSignal := "SIGTERM"
if container.StopSignal != "" {
stopSignal = container.StopSignal
} else {
// The image may have been deleted, and the `StopSignal` field is
// just introduced to handle that.
// However, for containers created before the `StopSignal` field is
// introduced, still try to get the stop signal from the image config.
// If the image has been deleted, logging an error and using the
// default SIGTERM is still better than returning error and leaving
// the container unstoppable. (See issue #990)
// TODO(random-liu): Remove this logic when containerd 1.2 is deprecated.
image, err := c.GetImage(container.ImageRef)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get image %q: %w", container.ImageRef, err)
}
log.G(ctx).Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal)
} else {
if image.ImageSpec.Config.StopSignal != "" {
stopSignal = image.ImageSpec.Config.StopSignal
}
}
}
sig, err := signal.ParseSignal(stopSignal)
if err != nil {
return fmt.Errorf("failed to parse stop signal %q: %w", stopSignal, err)
}
var sswt bool
if container.IsStopSignaledWithTimeout == nil {
log.G(ctx).Infof("unable to ensure stop signal %v was not sent twice to container %v", sig, id)
sswt = true
} else {
sswt = atomic.CompareAndSwapUint32(container.IsStopSignaledWithTimeout, 0, 1)
}
if sswt {
log.G(ctx).Infof("Stop container %q with signal %v", id, sig)
if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to stop container %q: %w", id, err)
}
} else {
log.G(ctx).Infof("Skipping the sending of signal %v to container %q because a prior stop with timeout>0 request already sent the signal", sig, id)
}
sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout)
defer sigTermCtxCancel()
err = c.waitContainerStop(sigTermCtx, container)
if err == nil {
// Container stopped on first signal no need for SIGKILL
return nil
}
// If the parent context was cancelled or exceeded return immediately
if ctx.Err() != nil {
return ctx.Err()
}
// sigTermCtx was exceeded. Send SIGKILL
log.G(ctx).Debugf("Stop container %q with signal %v timed out", id, sig)
}
log.G(ctx).Infof("Kill container %q", id)
if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to kill container %q: %w", id, err)
}
// Wait for a fixed timeout until container stop is observed by event monitor.
err = c.waitContainerStop(ctx, container)
if err != nil {
return fmt.Errorf("an error occurs during waiting for container %q to be killed: %w", id, err)
}
return nil
}
// waitContainerStop waits for container to be stopped until context is
// cancelled or the context deadline is exceeded.
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error {
select {
case <-ctx.Done():
return fmt.Errorf("wait container %q: %w", container.ID, ctx.Err())
case <-container.Stopped():
return nil
}
}
// cleanupUnknownContainer cleanup stopped container in unknown state.
func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container, sandboxID string, c *criService) error {
// Reuse handleContainerExit to do the cleanup.
return handleContainerExit(ctx, &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: 0,
ExitStatus: unknownExitCode,
ExitedAt: protobuf.ToTimestamp(time.Now()),
}, cntr, sandboxID, c)
}

View File

@@ -0,0 +1,92 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
)
func TestWaitContainerStop(t *testing.T) {
id := "test-id"
for _, test := range []struct {
desc string
status *containerstore.Status
cancel bool
timeout time.Duration
expectErr bool
}{
{
desc: "should return error if timeout exceeds",
status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
timeout: 200 * time.Millisecond,
expectErr: true,
},
{
desc: "should return error if context is cancelled",
status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
timeout: time.Hour,
cancel: true,
expectErr: true,
},
{
desc: "should not return error if container is stopped before timeout",
status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
timeout: time.Hour,
expectErr: false,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: id},
containerstore.WithFakeStatus(*test.status),
)
assert.NoError(t, err)
assert.NoError(t, c.containerStore.Add(container))
ctx := context.Background()
if test.cancel {
cancelledCtx, cancel := context.WithCancel(ctx)
cancel()
ctx = cancelledCtx
}
if test.timeout > 0 {
timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
defer cancel()
ctx = timeoutCtx
}
err = c.waitContainerStop(ctx, container)
assert.Equal(t, test.expectErr, err != nil, test.desc)
})
}
}

View File

@@ -0,0 +1,157 @@
//go:build !darwin && !freebsd
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
gocontext "context"
"fmt"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/errdefs"
"github.com/containerd/log"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
)
// UpdateContainerResources updates ContainerConfig of the container.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container: %w", err)
}
sandbox, err := c.sandboxStore.Get(container.SandboxID)
if err != nil {
return nil, err
}
resources := r.GetLinux()
updated, err := c.nri.UpdateContainerResources(ctx, &sandbox, &container, resources)
if err != nil {
return nil, fmt.Errorf("NRI container update failed: %w", err)
}
if updated != nil {
*resources = *updated
}
// Update resources in status update transaction, so that:
// 1) There won't be race condition with container start.
// 2) There won't be concurrent resource update to the same container.
if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
return c.updateContainerResources(ctx, container, r, status)
}); err != nil {
return nil, fmt.Errorf("failed to update resources: %w", err)
}
err = c.nri.PostUpdateContainerResources(ctx, &sandbox, &container)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI post-update notification failed")
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}
func (c *criService) updateContainerResources(ctx context.Context,
cntr containerstore.Container,
r *runtime.UpdateContainerResourcesRequest,
status containerstore.Status) (newStatus containerstore.Status, retErr error) {
newStatus = status
id := cntr.ID
// Do not update the container when there is a removal in progress.
if status.Removing {
return newStatus, fmt.Errorf("container %q is in removing state", id)
}
// Update container spec. If the container is not started yet, updating
// spec makes sure that the resource limits are correct when start;
// if the container is already started, updating spec is still required,
// the spec will become our source of truth for resource limits.
oldSpec, err := cntr.Container.Spec(ctx)
if err != nil {
return newStatus, fmt.Errorf("failed to get container spec: %w", err)
}
newSpec, err := updateOCIResource(ctx, oldSpec, r, c.config)
if err != nil {
return newStatus, fmt.Errorf("failed to update resource in spec: %w", err)
}
if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil {
return newStatus, err
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Reset spec on error.
if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id)
}
} else {
// Update container status only when the spec is updated
newStatus = copyResourcesToStatus(newSpec, status)
}
}()
// If container is not running, only update spec is enough, new resource
// limit will be applied when container start.
if status.State() != runtime.ContainerState_CONTAINER_RUNNING {
return newStatus, nil
}
task, err := cntr.Container.Task(ctx, nil)
if err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return newStatus, nil
}
return newStatus, fmt.Errorf("failed to get task: %w", err)
}
// newSpec.Linux / newSpec.Windows won't be nil
if err := task.Update(ctx, containerd.WithResources(getResources(newSpec))); err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return newStatus, nil
}
return newStatus, fmt.Errorf("failed to update resources: %w", err)
}
return newStatus, nil
}
// updateContainerSpec updates container spec.
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
s, err := typeurl.MarshalAny(spec)
if err != nil {
return fmt.Errorf("failed to marshal spec %+v: %w", spec, err)
}
if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = s
return nil
}); err != nil {
return fmt.Errorf("failed to update container spec: %w", err)
}
return nil
}

View File

@@ -0,0 +1,51 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/opts"
"github.com/containerd/containerd/v2/internal/cri/util"
)
// updateOCIResource updates container resource limit.
func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest,
config criconfig.Config) (*runtimespec.Spec, error) {
// Copy to make sure old spec is not changed.
var cloned runtimespec.Spec
if err := util.DeepCopy(&cloned, spec); err != nil {
return nil, fmt.Errorf("failed to deep copy: %w", err)
}
if cloned.Linux == nil {
cloned.Linux = &runtimespec.Linux{}
}
if err := opts.WithResources(r.GetLinux(), config.TolerateMissingHugetlbController, config.DisableHugetlbController)(ctx, nil, nil, &cloned); err != nil {
return nil, fmt.Errorf("unable to set linux container resources: %w", err)
}
return &cloned, nil
}
func getResources(spec *runtimespec.Spec) interface{} {
return spec.Linux.Resources
}

View File

@@ -0,0 +1,256 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"google.golang.org/protobuf/proto"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
criopts "github.com/containerd/containerd/v2/internal/cri/opts"
)
func TestUpdateOCILinuxResource(t *testing.T) {
oomscoreadj := new(int)
*oomscoreadj = -500
expectedSwap := func(swap int64) *int64 {
if criopts.SwapControllerAvailable() {
return &swap
}
return nil
}
for _, test := range []struct {
desc string
spec *runtimespec.Spec
request *runtime.UpdateContainerResourcesRequest
expected *runtimespec.Spec
expectErr bool
}{
{
desc: "should be able to update each resource",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(1111),
Quota: proto.Int64(2222),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "2-3",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuPeriod: 6666,
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetCpus: "4-5",
CpusetMems: "6-7",
Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"},
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(6666),
Cpus: "4-5",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"},
},
},
},
},
{
desc: "should skip empty fields",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(1111),
Quota: proto.Int64(2222),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "2-3",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetMems: "6-7",
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
},
{
desc: "should be able to fill empty fields",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuPeriod: 6666,
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetCpus: "4-5",
CpusetMems: "6-7",
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(6666),
Cpus: "4-5",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
},
{
desc: "should be able to patch the unified map",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(1111),
Quota: proto.Int64(2222),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "2-3",
},
Unified: map[string]string{"memory.min": "65536", "memory.max": "1507328"},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuPeriod: 6666,
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetCpus: "4-5",
CpusetMems: "6-7",
Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "1024"},
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(6666),
Cpus: "4-5",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "1507328", "memory.max": "1507328", "memory.swap.max": "1024"},
},
},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config := criconfig.Config{
RuntimeConfig: criconfig.RuntimeConfig{
TolerateMissingHugetlbController: true,
DisableHugetlbController: false,
},
}
got, err := updateOCIResource(context.Background(), test.spec, test.request, config)
if test.expectErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
assert.Equal(t, test.expected, got)
})
}
}

View File

@@ -0,0 +1,45 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
)
// UpdateContainerResources updates ContainerConfig of the container.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container: %w", err)
}
// Update resources in status update transaction, so that:
// 1) There won't be race condition with container start.
// 2) There won't be concurrent resource update to the same container.
if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
return status, nil
}); err != nil {
return nil, fmt.Errorf("failed to update resources: %w", err)
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}

View File

@@ -0,0 +1,51 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/opts"
"github.com/containerd/containerd/v2/internal/cri/util"
)
// updateOCIResource updates container resource limit.
func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest,
config criconfig.Config) (*runtimespec.Spec, error) {
// Copy to make sure old spec is not changed.
var cloned runtimespec.Spec
if err := util.DeepCopy(&cloned, spec); err != nil {
return nil, fmt.Errorf("failed to deep copy: %w", err)
}
if cloned.Windows == nil {
cloned.Windows = &runtimespec.Windows{}
}
if err := opts.WithWindowsResources(r.GetWindows())(ctx, nil, nil, &cloned); err != nil {
return nil, fmt.Errorf("unable to set windows container resources: %w", err)
}
return &cloned, nil
}
func getResources(spec *runtimespec.Spec) interface{} {
return spec.Windows.Resources
}

View File

@@ -0,0 +1,591 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/utils/clock"
eventtypes "github.com/containerd/containerd/v2/api/events"
apitasks "github.com/containerd/containerd/v2/api/services/tasks/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/internal/cri/constants"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/containerd/v2/pkg/events"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
)
const (
backOffInitDuration = 1 * time.Second
backOffMaxDuration = 5 * time.Minute
backOffExpireCheckDuration = 1 * time.Second
// handleEventTimeout is the timeout for handling 1 event. Event monitor
// handles events in serial, if one event blocks the event monitor, no
// other events can be handled.
// Add a timeout for each event handling, events that timeout will be requeued and
// handled again in the future.
handleEventTimeout = 10 * time.Second
)
// eventMonitor monitors containerd event and updates internal state correspondingly.
type eventMonitor struct {
c *criService
ch <-chan *events.Envelope
errCh <-chan error
ctx context.Context
cancel context.CancelFunc
backOff *backOff
}
type backOff struct {
// queuePoolMu is mutex used to protect the queuePool map
queuePoolMu sync.Mutex
queuePool map[string]*backOffQueue
// tickerMu is mutex used to protect the ticker.
tickerMu sync.Mutex
ticker *time.Ticker
minDuration time.Duration
maxDuration time.Duration
checkDuration time.Duration
clock clock.Clock
}
type backOffQueue struct {
events []interface{}
expireTime time.Time
duration time.Duration
clock clock.Clock
}
// Create new event monitor. New event monitor will start subscribing containerd event. All events
// happen after it should be monitored.
func newEventMonitor(c *criService) *eventMonitor {
ctx, cancel := context.WithCancel(context.Background())
return &eventMonitor{
c: c,
ctx: ctx,
cancel: cancel,
backOff: newBackOff(),
}
}
// subscribe starts to subscribe containerd events.
func (em *eventMonitor) subscribe(subscriber events.Subscriber) {
// note: filters are any match, if you want any match but not in namespace foo
// then you have to manually filter namespace foo
filters := []string{
`topic=="/tasks/oom"`,
`topic~="/images/"`,
}
em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...)
}
// startSandboxExitMonitor starts an exit monitor for a given sandbox.
func (em *eventMonitor) startSandboxExitMonitor(ctx context.Context, id string, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
select {
case exitRes := <-exitCh:
exitStatus, exitedAt, err := exitRes.Result()
if err != nil {
log.L.WithError(err).Errorf("failed to get task exit status for %q", id)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
e := &eventtypes.SandboxExit{
SandboxID: id,
ExitStatus: exitStatus,
ExitedAt: protobuf.ToTimestamp(exitedAt),
}
log.L.Debugf("received exit event %+v", e)
err = func() error {
dctx := ctrdutil.NamespacedContext()
dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout)
defer dcancel()
sb, err := em.c.sandboxStore.Get(e.GetSandboxID())
if err == nil {
if err := handleSandboxExit(dctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil {
return err
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get sandbox %s: %w", e.SandboxID, err)
}
return nil
}()
if err != nil {
log.L.WithError(err).Errorf("failed to handle sandbox TaskExit event %+v", e)
em.backOff.enBackOff(id, e)
}
return
case <-ctx.Done():
}
}()
return stopCh
}
// startContainerExitMonitor starts an exit monitor for a given container.
func (em *eventMonitor) startContainerExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
select {
case exitRes := <-exitCh:
exitStatus, exitedAt, err := exitRes.Result()
if err != nil {
log.L.WithError(err).Errorf("failed to get task exit status for %q", id)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
e := &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: pid,
ExitStatus: exitStatus,
ExitedAt: protobuf.ToTimestamp(exitedAt),
}
log.L.Debugf("received exit event %+v", e)
err = func() error {
dctx := ctrdutil.NamespacedContext()
dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout)
defer dcancel()
cntr, err := em.c.containerStore.Get(e.ID)
if err == nil {
if err := handleContainerExit(dctx, e, cntr, cntr.SandboxID, em.c); err != nil {
return err
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get container %s: %w", e.ID, err)
}
return nil
}()
if err != nil {
log.L.WithError(err).Errorf("failed to handle container TaskExit event %+v", e)
em.backOff.enBackOff(id, e)
}
return
case <-ctx.Done():
}
}()
return stopCh
}
func convertEvent(e typeurl.Any) (string, interface{}, error) {
id := ""
evt, err := typeurl.UnmarshalAny(e)
if err != nil {
return "", nil, fmt.Errorf("failed to unmarshalany: %w", err)
}
switch e := evt.(type) {
case *eventtypes.TaskOOM:
id = e.ContainerID
case *eventtypes.SandboxExit:
id = e.SandboxID
case *eventtypes.ImageCreate:
id = e.Name
case *eventtypes.ImageUpdate:
id = e.Name
case *eventtypes.ImageDelete:
id = e.Name
default:
return "", nil, errors.New("unsupported event")
}
return id, evt, nil
}
// start starts the event monitor which monitors and handles all subscribed events.
// It returns an error channel for the caller to wait for stop errors from the
// event monitor.
//
// NOTE:
// 1. start must be called after subscribe.
// 2. The task exit event has been handled in individual startSandboxExitMonitor
// or startContainerExitMonitor goroutine at the first. If the goroutine fails,
// it puts the event into backoff retry queue and event monitor will handle
// it later.
func (em *eventMonitor) start() <-chan error {
errCh := make(chan error)
if em.ch == nil || em.errCh == nil {
panic("event channel is nil")
}
backOffCheckCh := em.backOff.start()
go func() {
defer close(errCh)
for {
select {
case e := <-em.ch:
log.L.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic)
if e.Namespace != constants.K8sContainerdNamespace {
log.L.Debugf("Ignoring events in namespace - %q", e.Namespace)
break
}
id, evt, err := convertEvent(e.Event)
if err != nil {
log.L.WithError(err).Errorf("Failed to convert event %+v", e)
break
}
if em.backOff.isInBackOff(id) {
log.L.Infof("Events for %q is in backoff, enqueue event %+v", id, evt)
em.backOff.enBackOff(id, evt)
break
}
if err := em.handleEvent(evt); err != nil {
log.L.WithError(err).Errorf("Failed to handle event %+v for %s", evt, id)
em.backOff.enBackOff(id, evt)
}
case err := <-em.errCh:
// Close errCh in defer directly if there is no error.
if err != nil {
log.L.WithError(err).Error("Failed to handle event stream")
errCh <- err
}
return
case <-backOffCheckCh:
ids := em.backOff.getExpiredIDs()
for _, id := range ids {
queue := em.backOff.deBackOff(id)
for i, evt := range queue.events {
if err := em.handleEvent(evt); err != nil {
log.L.WithError(err).Errorf("Failed to handle backOff event %+v for %s", evt, id)
em.backOff.reBackOff(id, queue.events[i:], queue.duration)
break
}
}
}
}
}
}()
return errCh
}
// stop stops the event monitor. It will close the event channel.
// Once event monitor is stopped, it can't be started.
func (em *eventMonitor) stop() {
em.backOff.stop()
em.cancel()
}
// handleEvent handles a containerd event.
func (em *eventMonitor) handleEvent(any interface{}) error {
ctx := ctrdutil.NamespacedContext()
ctx, cancel := context.WithTimeout(ctx, handleEventTimeout)
defer cancel()
switch e := any.(type) {
case *eventtypes.TaskExit:
log.L.Infof("TaskExit event %+v", e)
// Use ID instead of ContainerID to rule out TaskExit event for exec.
cntr, err := em.c.containerStore.Get(e.ID)
if err == nil {
if err := handleContainerExit(ctx, e, cntr, cntr.SandboxID, em.c); err != nil {
return fmt.Errorf("failed to handle container TaskExit event: %w", err)
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find container for TaskExit event: %w", err)
}
sb, err := em.c.sandboxStore.Get(e.ID)
if err == nil {
if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil {
return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err)
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find sandbox for TaskExit event: %w", err)
}
return nil
case *eventtypes.SandboxExit:
log.L.Infof("SandboxExit event %+v", e)
sb, err := em.c.sandboxStore.Get(e.GetSandboxID())
if err == nil {
if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil {
return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err)
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find sandbox for TaskExit event: %w", err)
}
return nil
case *eventtypes.TaskOOM:
log.L.Infof("TaskOOM event %+v", e)
// For TaskOOM, we only care which container it belongs to.
cntr, err := em.c.containerStore.Get(e.ContainerID)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find container for TaskOOM event: %w", err)
}
return nil
}
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Reason = oomExitReason
return status, nil
})
if err != nil {
return fmt.Errorf("failed to update container status for TaskOOM event: %w", err)
}
// TODO: ImageService should handle these events directly
case *eventtypes.ImageCreate:
log.L.Infof("ImageCreate event %+v", e)
return em.c.UpdateImage(ctx, e.Name)
case *eventtypes.ImageUpdate:
log.L.Infof("ImageUpdate event %+v", e)
return em.c.UpdateImage(ctx, e.Name)
case *eventtypes.ImageDelete:
log.L.Infof("ImageDelete event %+v", e)
return em.c.UpdateImage(ctx, e.Name)
}
return nil
}
// handleContainerExit handles TaskExit event for container.
func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr containerstore.Container, sandboxID string, c *criService) error {
// Attach container IO so that `Delete` could cleanup the stream properly.
task, err := cntr.Container.Task(ctx,
func(*containerdio.FIFOSet) (containerdio.IO, error) {
// We can't directly return cntr.IO here, because
// even if cntr.IO is nil, the cio.IO interface
// is not.
// See https://tour.golang.org/methods/12:
// Note that an interface value that holds a nil
// concrete value is itself non-nil.
if cntr.IO != nil {
return cntr.IO, nil
}
return nil, nil
},
)
if err != nil {
if !errdefs.IsNotFound(err) && !errdefs.IsUnavailable(err) {
return fmt.Errorf("failed to load task for container: %w", err)
}
} else {
// TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
if _, err = task.Delete(ctx, c.nri.WithContainerExit(&cntr), containerd.WithProcessKill); err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to stop container: %w", err)
}
// Move on to make sure container status is updated.
}
}
// NOTE: Both sb.Container.Task and task.Delete interface always ensures
// that the status of target task. However, the interfaces return
// ErrNotFound, which doesn't mean that the shim instance doesn't exist.
//
// There are two caches for task in containerd:
//
// 1. io.containerd.service.v1.tasks-service
// 2. io.containerd.runtime.v2.task
//
// First one is to maintain the shim connection and shutdown the shim
// in Delete API. And the second one is to maintain the lifecycle of
// task in shim server.
//
// So, if the shim instance is running and task has been deleted in shim
// server, the sb.Container.Task and task.Delete will receive the
// ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
// shim will be leaky.
//
// Based on containerd/containerd#7496 issue, when host is under IO
// pressure, the umount2 syscall will take more than 10 seconds so that
// the CRI plugin will cancel this task.Delete call. However, the shim
// server isn't aware about this. After return from umount2 syscall, the
// shim server continue delete the task record. And then CRI plugin
// retries to delete task and retrieves ErrNotFound and marks it as
// stopped. Therefore, The shim is leaky.
//
// It's hard to handle the connection lost or request canceled cases in
// shim server. We should call Delete API to io.containerd.service.v1.tasks-service
// to ensure that shim instance is shutdown.
//
// REF:
// 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
// 2. https://github.com/containerd/containerd/issues/8931
if errdefs.IsNotFound(err) {
_, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: cntr.Container.ID()})
if err != nil {
err = errdefs.FromGRPC(err)
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to cleanup container %s in task-service: %w", cntr.Container.ID(), err)
}
}
log.L.Infof("Ensure that container %s in task-service has been cleanup successfully", cntr.Container.ID())
}
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
if status.FinishedAt == 0 {
status.Pid = 0
status.FinishedAt = protobuf.FromTimestamp(e.ExitedAt).UnixNano()
status.ExitCode = int32(e.ExitStatus)
}
// Unknown state can only transit to EXITED state, so we need
// to handle unknown state here.
if status.Unknown {
log.L.Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID)
status.Unknown = false
}
return status, nil
})
if err != nil {
return fmt.Errorf("failed to update container state: %w", err)
}
// Using channel to propagate the information of container stop
cntr.Stop()
c.generateAndSendContainerEvent(ctx, cntr.ID, sandboxID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT)
return nil
}
// handleSandboxExit handles sandbox exit event.
func handleSandboxExit(ctx context.Context, sb sandboxstore.Sandbox, exitStatus uint32, exitTime time.Time, c *criService) error {
if err := sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
status.State = sandboxstore.StateNotReady
status.Pid = 0
status.ExitStatus = exitStatus
status.ExitedAt = exitTime
return status, nil
}); err != nil {
return fmt.Errorf("failed to update sandbox state: %w", err)
}
// Using channel to propagate the information of sandbox stop
sb.Stop()
c.generateAndSendContainerEvent(ctx, sb.ID, sb.ID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT)
return nil
}
func newBackOff() *backOff {
return &backOff{
queuePool: map[string]*backOffQueue{},
minDuration: backOffInitDuration,
maxDuration: backOffMaxDuration,
checkDuration: backOffExpireCheckDuration,
clock: clock.RealClock{},
}
}
func (b *backOff) getExpiredIDs() []string {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
var ids []string
for id, q := range b.queuePool {
if q.isExpire() {
ids = append(ids, id)
}
}
return ids
}
func (b *backOff) isInBackOff(key string) bool {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
if _, ok := b.queuePool[key]; ok {
return true
}
return false
}
// enBackOff start to backOff and put event to the tail of queue
func (b *backOff) enBackOff(key string, evt interface{}) {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
if queue, ok := b.queuePool[key]; ok {
queue.events = append(queue.events, evt)
return
}
b.queuePool[key] = newBackOffQueue([]interface{}{evt}, b.minDuration, b.clock)
}
// enBackOff get out the whole queue
func (b *backOff) deBackOff(key string) *backOffQueue {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
queue := b.queuePool[key]
delete(b.queuePool, key)
return queue
}
// enBackOff start to backOff again and put events to the queue
func (b *backOff) reBackOff(key string, events []interface{}, oldDuration time.Duration) {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
duration := 2 * oldDuration
if duration > b.maxDuration {
duration = b.maxDuration
}
b.queuePool[key] = newBackOffQueue(events, duration, b.clock)
}
func (b *backOff) start() <-chan time.Time {
b.tickerMu.Lock()
defer b.tickerMu.Unlock()
b.ticker = time.NewTicker(b.checkDuration)
return b.ticker.C
}
func (b *backOff) stop() {
b.tickerMu.Lock()
defer b.tickerMu.Unlock()
if b.ticker != nil {
b.ticker.Stop()
}
}
func newBackOffQueue(events []interface{}, init time.Duration, c clock.Clock) *backOffQueue {
return &backOffQueue{
events: events,
duration: init,
expireTime: c.Now().Add(init),
clock: c,
}
}
func (q *backOffQueue) isExpire() bool {
// return time.Now >= expireTime
return !q.clock.Now().Before(q.expireTime)
}

View File

@@ -0,0 +1,136 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
eventtypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/typeurl/v2"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
testingclock "k8s.io/utils/clock/testing"
)
// TestBackOff tests the logic of backOff struct.
func TestBackOff(t *testing.T) {
testStartTime := time.Now()
testClock := testingclock.NewFakeClock(testStartTime)
inputQueues := map[string]*backOffQueue{
"container1": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container1"},
&eventtypes.TaskOOM{ContainerID: "container1"},
},
},
"container2": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container2"},
&eventtypes.TaskOOM{ContainerID: "container2"},
},
},
}
expectedQueues := map[string]*backOffQueue{
"container2": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container2"},
&eventtypes.TaskOOM{ContainerID: "container2"},
},
expireTime: testClock.Now().Add(backOffInitDuration),
duration: backOffInitDuration,
clock: testClock,
},
"container1": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container1"},
&eventtypes.TaskOOM{ContainerID: "container1"},
},
expireTime: testClock.Now().Add(backOffInitDuration),
duration: backOffInitDuration,
clock: testClock,
},
}
t.Logf("Should be able to backOff a event")
actual := newBackOff()
actual.clock = testClock
for k, queue := range inputQueues {
for _, event := range queue.events {
actual.enBackOff(k, event)
}
}
assert.Equal(t, actual.queuePool, expectedQueues)
t.Logf("Should be able to check if the container is in backOff state")
for k, queue := range inputQueues {
for _, e := range queue.events {
evt, err := typeurl.MarshalAny(e)
assert.NoError(t, err)
key, _, err := convertEvent(evt)
assert.NoError(t, err)
assert.Equal(t, k, key)
assert.Equal(t, actual.isInBackOff(key), true)
}
}
t.Logf("Should be able to check that a container isn't in backOff state")
notExistKey := "containerNotExist"
assert.Equal(t, actual.isInBackOff(notExistKey), false)
t.Logf("No containers should be expired")
assert.Empty(t, actual.getExpiredIDs())
t.Logf("Should be able to get all keys which are expired for backOff")
testClock.Sleep(backOffInitDuration)
actKeyList := actual.getExpiredIDs()
assert.Equal(t, len(inputQueues), len(actKeyList))
for k := range inputQueues {
assert.Contains(t, actKeyList, k)
}
t.Logf("Should be able to get out all backOff events")
doneQueues := map[string]*backOffQueue{}
for k := range inputQueues {
actQueue := actual.deBackOff(k)
doneQueues[k] = actQueue
assert.True(t, cmp.Equal(actQueue.events, expectedQueues[k].events, protobuf.Compare))
}
t.Logf("Should not get out the event again after having got out the backOff event")
for k := range inputQueues {
var expect *backOffQueue
actQueue := actual.deBackOff(k)
assert.Equal(t, actQueue, expect)
}
t.Logf("Should be able to reBackOff")
for k, queue := range doneQueues {
failEventIndex := 1
events := queue.events[failEventIndex:]
actual.reBackOff(k, events, queue.duration)
actQueue := actual.deBackOff(k)
expQueue := &backOffQueue{
events: events,
expireTime: testClock.Now().Add(2 * queue.duration),
duration: 2 * queue.duration,
clock: testClock,
}
assert.Equal(t, actQueue, expQueue)
}
}

View File

@@ -0,0 +1,33 @@
//go:build gofuzz
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
func SandboxStore(cs CRIService) (*sandbox.Store, error) {
s, ok := cs.(*criService)
if !ok {
return nil, fmt.Errorf("%+v is not sbserver.criService", cs)
}
return s.sandboxStore, nil
}

View File

@@ -0,0 +1,630 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"path"
"path/filepath"
"regexp"
goruntime "runtime"
"strconv"
"strings"
"time"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
clabels "github.com/containerd/containerd/v2/pkg/labels"
"github.com/containerd/errdefs"
"github.com/containerd/log"
)
// TODO: Move common helpers for sbserver and podsandbox to a dedicated package once basic services are functinal.
const (
// errorStartReason is the exit reason when fails to start container.
errorStartReason = "StartError"
// errorStartExitCode is the exit code when fails to start container.
// 128 is the same with Docker's behavior.
// TODO(windows): Figure out what should be used for windows.
errorStartExitCode = 128
// completeExitReason is the exit reason when container exits with code 0.
completeExitReason = "Completed"
// errorExitReason is the exit reason when container exits with code non-zero.
errorExitReason = "Error"
// oomExitReason is the exit reason when process in container is oom killed.
oomExitReason = "OOMKilled"
// sandboxesDir contains all sandbox root. A sandbox root is the running
// directory of the sandbox, all files created for the sandbox will be
// placed under this directory.
sandboxesDir = "sandboxes"
// containersDir contains all container root.
containersDir = "containers"
// Delimiter used to construct container/sandbox names.
nameDelimiter = "_"
// defaultIfName is the default network interface for the pods
defaultIfName = "eth0"
// devShm is the default path of /dev/shm.
devShm = "/dev/shm"
// etcHosts is the default path of /etc/hosts file.
etcHosts = "/etc/hosts"
// etcHostname is the default path of /etc/hostname file.
etcHostname = "/etc/hostname"
// resolvConfPath is the abs path of resolv.conf on host or container.
resolvConfPath = "/etc/resolv.conf"
)
// getSandboxRootDir returns the root directory for managing sandbox files,
// e.g. hosts files.
func (c *criService) getSandboxRootDir(id string) string {
return filepath.Join(c.config.RootDir, sandboxesDir, id)
}
// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files,
// e.g. named pipes.
func (c *criService) getVolatileSandboxRootDir(id string) string {
return filepath.Join(c.config.StateDir, sandboxesDir, id)
}
// getSandboxHostname returns the hostname file path inside the sandbox root directory.
func (c *criService) getSandboxHostname(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hostname")
}
// getSandboxHosts returns the hosts file path inside the sandbox root directory.
func (c *criService) getSandboxHosts(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hosts")
}
// getResolvPath returns resolv.conf filepath for specified sandbox.
func (c *criService) getResolvPath(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
}
// getSandboxDevShm returns the shm file path inside the sandbox root directory.
func (c *criService) getSandboxDevShm(id string) string {
return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
}
// makeSandboxName generates sandbox name from sandbox metadata. The name
// generated is unique as long as sandbox metadata is unique.
func makeSandboxName(s *runtime.PodSandboxMetadata) string {
return strings.Join([]string{
s.Name, // 0
s.Namespace, // 1
s.Uid, // 2
strconv.FormatUint(uint64(s.Attempt), 10), // 3
}, nameDelimiter)
}
// makeContainerName generates container name from sandbox and container metadata.
// The name generated is unique as long as the sandbox container combination is
// unique.
func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetadata) string {
return strings.Join([]string{
c.Name, // 0: container name
s.Name, // 1: pod name
s.Namespace, // 2: pod namespace
s.Uid, // 3: pod uid
strconv.FormatUint(uint64(c.Attempt), 10), // 4: attempt number of creating the container
}, nameDelimiter)
}
// getContainerRootDir returns the root directory for managing container files,
// e.g. state checkpoint.
func (c *criService) getContainerRootDir(id string) string {
return filepath.Join(c.config.RootDir, containersDir, id)
}
// getVolatileContainerRootDir returns the root directory for managing volatile container files,
// e.g. named pipes.
func (c *criService) getVolatileContainerRootDir(id string) string {
return filepath.Join(c.config.StateDir, containersDir, id)
}
// criContainerStateToString formats CRI container state to string.
func criContainerStateToString(state runtime.ContainerState) string {
return runtime.ContainerState_name[int32(state)]
}
// toContainerdImage converts an image object in image store to containerd image handler.
func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) {
// image should always have at least one reference.
if len(image.References) == 0 {
return nil, fmt.Errorf("invalid image with no reference %q", image.ID)
}
return c.client.GetImage(ctx, image.References[0])
}
// getUserFromImage gets uid or user name of the image user.
// If user is numeric, it will be treated as uid; or else, it is treated as user name.
func getUserFromImage(user string) (*int64, string) {
// return both empty if user is not specified in the image.
if user == "" {
return nil, ""
}
// split instances where the id may contain user:group
user = strings.Split(user, ":")[0]
// user could be either uid or user name. Try to interpret as numeric uid.
uid, err := strconv.ParseInt(user, 10, 64)
if err != nil {
// If user is non numeric, assume it's user name.
return nil, user
}
// If user is a numeric uid.
return &uid, ""
}
// validateTargetContainer checks that a container is a valid
// target for a container using PID NamespaceMode_TARGET.
// The target container must be in the same sandbox and must be running.
// Returns the target container for convenience.
func (c *criService) validateTargetContainer(sandboxID, targetContainerID string) (containerstore.Container, error) {
targetContainer, err := c.containerStore.Get(targetContainerID)
if err != nil {
return containerstore.Container{}, fmt.Errorf("container %q does not exist: %w", targetContainerID, err)
}
targetSandboxID := targetContainer.Metadata.SandboxID
if targetSandboxID != sandboxID {
return containerstore.Container{},
fmt.Errorf("container %q (sandbox %s) does not belong to sandbox %s", targetContainerID, targetSandboxID, sandboxID)
}
status := targetContainer.Status.Get()
if state := status.State(); state != runtime.ContainerState_CONTAINER_RUNNING {
return containerstore.Container{}, fmt.Errorf("container %q is not running - in state %s", targetContainerID, state)
}
return targetContainer, nil
}
// isInCRIMounts checks whether a destination is in CRI mount list.
func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
for _, m := range mounts {
if filepath.Clean(m.ContainerPath) == filepath.Clean(dst) {
return true
}
}
return false
}
// filterLabel returns a label filter. Use `%q` here because containerd
// filter needs extra quote to work properly.
func filterLabel(k, v string) string {
return fmt.Sprintf("labels.%q==%q", k, v)
}
// buildLabel builds the labels from config to be passed to containerd
func buildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string {
labels := make(map[string]string)
for k, v := range imageConfigLabels {
if err := clabels.Validate(k, v); err == nil {
labels[k] = v
} else {
// In case the image label is invalid, we output a warning and skip adding it to the
// container.
log.L.WithError(err).Warnf("unable to add image label with key %s to the container", k)
}
}
// labels from the CRI request (config) will override labels in the image config
for k, v := range configLabels {
labels[k] = v
}
labels[crilabels.ContainerKindLabel] = containerType
return labels
}
// getRuntimeOptions get runtime options from container metadata.
func getRuntimeOptions(c containers.Container) (interface{}, error) {
from := c.Runtime.Options
if from == nil || from.GetValue() == nil {
return nil, nil
}
opts, err := typeurl.UnmarshalAny(from)
if err != nil {
return nil, err
}
return opts, nil
}
const (
// unknownExitCode is the exit code when exit reason is unknown.
unknownExitCode = 255
// unknownExitReason is the exit reason when exit reason is unknown.
unknownExitReason = "Unknown"
)
// unknownContainerStatus returns the default container status when its status is unknown.
func unknownContainerStatus() containerstore.Status {
return containerstore.Status{
CreatedAt: 0,
StartedAt: 0,
FinishedAt: 0,
ExitCode: unknownExitCode,
Reason: unknownExitReason,
Unknown: true,
}
}
// getPassthroughAnnotations filters requested pod annotations by comparing
// against permitted annotations for the given runtime.
func getPassthroughAnnotations(podAnnotations map[string]string,
runtimePodAnnotations []string) (passthroughAnnotations map[string]string) {
passthroughAnnotations = make(map[string]string)
for podAnnotationKey, podAnnotationValue := range podAnnotations {
for _, pattern := range runtimePodAnnotations {
// Use path.Match instead of filepath.Match here.
// filepath.Match treated `\\` as path separator
// on windows, which is not what we want.
if ok, _ := path.Match(pattern, podAnnotationKey); ok {
passthroughAnnotations[podAnnotationKey] = podAnnotationValue
}
}
}
return passthroughAnnotations
}
// copyResourcesToStatus copys container resource contraints from spec to
// container status.
// This will need updates when new fields are added to ContainerResources.
func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) containerstore.Status {
status.Resources = &runtime.ContainerResources{}
if spec.Linux != nil {
status.Resources.Linux = &runtime.LinuxContainerResources{}
if spec.Process != nil && spec.Process.OOMScoreAdj != nil {
status.Resources.Linux.OomScoreAdj = int64(*spec.Process.OOMScoreAdj)
}
if spec.Linux.Resources == nil {
return status
}
if spec.Linux.Resources.CPU != nil {
if spec.Linux.Resources.CPU.Period != nil {
status.Resources.Linux.CpuPeriod = int64(*spec.Linux.Resources.CPU.Period)
}
if spec.Linux.Resources.CPU.Quota != nil {
status.Resources.Linux.CpuQuota = *spec.Linux.Resources.CPU.Quota
}
if spec.Linux.Resources.CPU.Shares != nil {
status.Resources.Linux.CpuShares = int64(*spec.Linux.Resources.CPU.Shares)
}
status.Resources.Linux.CpusetCpus = spec.Linux.Resources.CPU.Cpus
status.Resources.Linux.CpusetMems = spec.Linux.Resources.CPU.Mems
}
if spec.Linux.Resources.Memory != nil {
if spec.Linux.Resources.Memory.Limit != nil {
status.Resources.Linux.MemoryLimitInBytes = *spec.Linux.Resources.Memory.Limit
}
if spec.Linux.Resources.Memory.Swap != nil {
status.Resources.Linux.MemorySwapLimitInBytes = *spec.Linux.Resources.Memory.Swap
}
}
if spec.Linux.Resources.HugepageLimits != nil {
hugepageLimits := make([]*runtime.HugepageLimit, 0, len(spec.Linux.Resources.HugepageLimits))
for _, l := range spec.Linux.Resources.HugepageLimits {
hugepageLimits = append(hugepageLimits, &runtime.HugepageLimit{
PageSize: l.Pagesize,
Limit: l.Limit,
})
}
status.Resources.Linux.HugepageLimits = hugepageLimits
}
if spec.Linux.Resources.Unified != nil {
status.Resources.Linux.Unified = spec.Linux.Resources.Unified
}
}
if spec.Windows != nil {
status.Resources.Windows = &runtime.WindowsContainerResources{}
if spec.Windows.Resources == nil {
return status
}
if spec.Windows.Resources.CPU != nil {
if spec.Windows.Resources.CPU.Shares != nil {
status.Resources.Windows.CpuShares = int64(*spec.Windows.Resources.CPU.Shares)
}
if spec.Windows.Resources.CPU.Count != nil {
status.Resources.Windows.CpuCount = int64(*spec.Windows.Resources.CPU.Count)
}
if spec.Windows.Resources.CPU.Maximum != nil {
status.Resources.Windows.CpuMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
}
}
if spec.Windows.Resources.Memory != nil {
if spec.Windows.Resources.Memory.Limit != nil {
status.Resources.Windows.MemoryLimitInBytes = int64(*spec.Windows.Resources.Memory.Limit)
}
}
// TODO: Figure out how to get RootfsSizeInBytes
}
return status
}
func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) {
podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID)
if err != nil {
log.G(ctx).Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err)
podSandboxStatus = nil
}
containerStatuses, err := c.getContainerStatuses(ctx, sandboxID)
if err != nil {
log.G(ctx).Errorf("Failed to get container statuses for container event for sandboxID %q: %v", sandboxID, err)
}
event := runtime.ContainerEventResponse{
ContainerId: containerID,
ContainerEventType: eventType,
CreatedAt: time.Now().UnixNano(),
PodSandboxStatus: podSandboxStatus,
ContainersStatuses: containerStatuses,
}
c.containerEventsQ.Send(event)
}
func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) {
request := &runtime.PodSandboxStatusRequest{PodSandboxId: podSandboxID}
response, err := c.PodSandboxStatus(ctx, request)
if err != nil {
return nil, err
}
return response.GetStatus(), nil
}
func (c *criService) getContainerStatuses(ctx context.Context, podSandboxID string) ([]*runtime.ContainerStatus, error) {
response, err := c.ListContainers(ctx, &runtime.ListContainersRequest{
Filter: &runtime.ContainerFilter{
PodSandboxId: podSandboxID,
},
})
if err != nil {
return nil, err
}
containerStatuses := []*runtime.ContainerStatus{}
for _, container := range response.Containers {
statusResp, err := c.ContainerStatus(ctx, &runtime.ContainerStatusRequest{
ContainerId: container.Id,
Verbose: false,
})
if err != nil {
if errdefs.IsNotFound(err) {
continue
}
return nil, err
}
containerStatuses = append(containerStatuses, statusResp.GetStatus())
}
return containerStatuses, nil
}
// hostNetwork handles checking if host networking was requested.
func hostNetwork(config *runtime.PodSandboxConfig) bool {
var hostNet bool
switch goruntime.GOOS {
case "windows":
// Windows HostProcess pods can only run on the host network
hostNet = config.GetWindows().GetSecurityContext().GetHostProcess()
case "darwin":
// No CNI on Darwin yet.
hostNet = true
default:
// Even on other platforms, the logic containerd uses is to check if NamespaceMode == NODE.
// So this handles Linux, as well as any other platforms not governed by the cases above
// that have special quirks.
hostNet = config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
}
return hostNet
}
// getCgroupsPath generates container cgroups path.
func getCgroupsPath(cgroupsParent, id string) string {
base := path.Base(cgroupsParent)
if strings.HasSuffix(base, ".slice") {
// For a.slice/b.slice/c.slice, base is c.slice.
// runc systemd cgroup path format is "slice:prefix:name".
return strings.Join([]string{base, "cri-containerd", id}, ":")
}
return filepath.Join(cgroupsParent, id)
}
func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) {
var labels []string
if selinuxOptions == nil {
return nil, nil
}
if err := checkSelinuxLevel(selinuxOptions.Level); err != nil {
return nil, err
}
if selinuxOptions.User != "" {
labels = append(labels, "user:"+selinuxOptions.User)
}
if selinuxOptions.Role != "" {
labels = append(labels, "role:"+selinuxOptions.Role)
}
if selinuxOptions.Type != "" {
labels = append(labels, "type:"+selinuxOptions.Type)
}
if selinuxOptions.Level != "" {
labels = append(labels, "level:"+selinuxOptions.Level)
}
return labels, nil
}
func checkSelinuxLevel(level string) error {
if len(level) == 0 {
return nil
}
matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level)
if err != nil {
return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err)
}
if !matched {
return fmt.Errorf("the format of 'level' %q is not correct", level)
}
return nil
}
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]runtimespec.LinuxIDMapping, error) {
var m []runtimespec.LinuxIDMapping
if len(runtimeIDMap) == 0 {
return m, nil
}
if len(runtimeIDMap) > 1 {
// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
}
// We know len is 1 now.
if runtimeIDMap[0] == nil {
return m, nil
}
uidMap := *runtimeIDMap[0]
if uidMap.Length < 1 {
return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
}
m = []runtimespec.LinuxIDMapping{
{
ContainerID: uidMap.ContainerId,
HostID: uidMap.HostId,
Size: uidMap.Length,
},
}
return m, nil
}
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []runtimespec.LinuxIDMapping, retErr error) {
if userns == nil {
// If userns is not set, the kubelet doesn't support this option
// and we should just fallback to no userns. This is completely
// valid.
return nil, nil, nil
}
uids, err := parseUsernsIDMap(userns.GetUids())
if err != nil {
return nil, nil, fmt.Errorf("UID mapping: %w", err)
}
gids, err = parseUsernsIDMap(userns.GetGids())
if err != nil {
return nil, nil, fmt.Errorf("GID mapping: %w", err)
}
switch mode := userns.GetMode(); mode {
case runtime.NamespaceMode_NODE:
if len(uids) != 0 || len(gids) != 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
}
case runtime.NamespaceMode_POD:
// This is valid, we will handle it in WithPodNamespaces().
if len(uids) == 0 || len(gids) == 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
}
default:
return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
return uids, gids, nil
}
// sameUsernsConfig checks if the userns configs are the same. If the mappings
// on each config are the same but in different order, it returns false.
// XXX: If the runtime.UserNamespace struct changes, we should update this
// function accordingly.
func sameUsernsConfig(a, b *runtime.UserNamespace) bool {
// If both are nil, they are the same.
if a == nil && b == nil {
return true
}
// If only one is nil, they are different.
if a == nil || b == nil {
return false
}
// At this point, a is not nil nor b.
if a.GetMode() != b.GetMode() {
return false
}
aUids, aGids, err := parseUsernsIDs(a)
if err != nil {
return false
}
bUids, bGids, err := parseUsernsIDs(b)
if err != nil {
return false
}
if !sameMapping(aUids, bUids) {
return false
}
if !sameMapping(aGids, bGids) {
return false
}
return true
}
// sameMapping checks if the mappings are the same. If the mappings are the same
// but in different order, it returns false.
func sameMapping(a, b []runtimespec.LinuxIDMapping) bool {
if len(a) != len(b) {
return false
}
for x := range a {
if a[x].ContainerID != b[x].ContainerID {
return false
}
if a[x].HostID != b[x].HostID {
return false
}
if a[x].Size != b[x].Size {
return false
}
}
return true
}

View File

@@ -0,0 +1,209 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/containerd/cgroups/v3"
"github.com/moby/sys/mountinfo"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/apparmor"
"github.com/containerd/containerd/v2/pkg/seccomp"
"github.com/containerd/containerd/v2/pkg/seutil"
"github.com/containerd/log"
)
// apparmorEnabled returns true if apparmor is enabled, supported by the host,
// if apparmor_parser is installed, and if we are not running docker-in-docker.
func (c *criService) apparmorEnabled() bool {
if c.config.DisableApparmor {
return false
}
return apparmor.HostSupports()
}
func (c *criService) seccompEnabled() bool {
return seccomp.IsEnabled()
}
// openLogFile opens/creates a container log file.
func openLogFile(path string) (*os.File, error) {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return nil, err
}
return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
}
// unmountRecursive unmounts the target and all mounts underneath, starting with
// the deepest mount first.
func unmountRecursive(ctx context.Context, target string) error {
target, err := mount.CanonicalizePath(target)
if err != nil {
return err
}
toUnmount, err := mountinfo.GetMounts(mountinfo.PrefixFilter(target))
if err != nil {
return err
}
// Make the deepest mount be first
sort.Slice(toUnmount, func(i, j int) bool {
return len(toUnmount[i].Mountpoint) > len(toUnmount[j].Mountpoint)
})
for i, m := range toUnmount {
if err := mount.UnmountAll(m.Mountpoint, unix.MNT_DETACH); err != nil {
if i == len(toUnmount)-1 { // last mount
return err
}
// This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem
log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", m.Mountpoint)
}
}
return nil
}
// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `ensureRemoveAll` if you really want to make every effort to remove
// a directory.
//
// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
// can be a race between reading directory entries and then actually attempting
// to remove everything in the directory.
// These types of errors do not need to be returned since it's ok for the dir to
// be gone we can just retry the remove operation.
//
// This should not return a `os.ErrNotExist` kind of error under any circumstances
func ensureRemoveAll(ctx context.Context, dir string) error {
notExistErr := make(map[string]bool)
// track retries
exitOnErr := make(map[string]int)
maxRetry := 50
// Attempt to unmount anything beneath this dir first.
if err := unmountRecursive(ctx, dir); err != nil {
log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir)
}
for {
err := os.RemoveAll(dir)
if err == nil {
return nil
}
pe, ok := err.(*os.PathError)
if !ok {
return err
}
if os.IsNotExist(err) {
if notExistErr[pe.Path] {
return err
}
notExistErr[pe.Path] = true
// There is a race where some subdir can be removed but after the
// parent dir entries have been read.
// So the path could be from `os.Remove(subdir)`
// If the reported non-existent path is not the passed in `dir` we
// should just retry, but otherwise return with no error.
if pe.Path == dir {
return nil
}
continue
}
if pe.Err != syscall.EBUSY {
return err
}
if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil {
return fmt.Errorf("error while removing %s: %w", dir, e)
}
if exitOnErr[pe.Path] == maxRetry {
return err
}
exitOnErr[pe.Path]++
time.Sleep(100 * time.Millisecond)
}
}
var vmbasedRuntimes = []string{
"io.containerd.kata",
}
func isVMBasedRuntime(runtimeType string) bool {
for _, rt := range vmbasedRuntimes {
if strings.Contains(runtimeType, rt) {
return true
}
}
return false
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
if !isVMBasedRuntime(runtimeType) {
return nil
}
l, err := seutil.ChangeToKVM(spec.Process.SelinuxLabel)
if err != nil {
return fmt.Errorf("failed to get selinux kvm label: %w", err)
}
spec.Process.SelinuxLabel = l
return nil
}
// getCgroupsMode returns cgropu mode.
// TODO: add build constraints to cgroups package and remove this helper
func isUnifiedCgroupsMode() bool {
return cgroups.Mode() == cgroups.Unified
}
func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
snapshotOpt := []snapshots.Opt{}
usernsOpts := nsOpts.GetUsernsOptions()
if usernsOpts == nil {
return snapshotOpt, nil
}
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
}
return snapshotOpt, nil
}

View File

@@ -0,0 +1,47 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"os"
"github.com/opencontainers/runtime-spec/specs-go"
)
// openLogFile opens/creates a container log file.
func openLogFile(path string) (*os.File, error) {
return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
}
// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `ensureRemoveAll` if you really want to make every effort to remove
// a directory.
func ensureRemoveAll(ctx context.Context, dir string) error {
return os.RemoveAll(dir)
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
return nil
}
func isUnifiedCgroupsMode() bool {
return false
}

View File

@@ -0,0 +1,558 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"os"
goruntime "runtime"
"strings"
"testing"
"time"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/containers"
runcoptions "github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pelletier/go-toml/v2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestGetUserFromImage tests the logic of getting image uid or user name of image user.
func TestGetUserFromImage(t *testing.T) {
newI64 := func(i int64) *int64 { return &i }
for _, test := range []struct {
desc string
user string
uid *int64
name string
}{
{
desc: "no gid",
user: "0",
uid: newI64(0),
},
{
desc: "uid/gid",
user: "0:1",
uid: newI64(0),
},
{
desc: "empty user",
user: "",
},
{
desc: "multiple separators",
user: "1:2:3",
uid: newI64(1),
},
{
desc: "root username",
user: "root:root",
name: "root",
},
{
desc: "username",
user: "test:test",
name: "test",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
actualUID, actualName := getUserFromImage(test.user)
assert.Equal(t, test.uid, actualUID)
assert.Equal(t, test.name, actualName)
})
}
}
func TestBuildLabels(t *testing.T) {
imageConfigLabels := map[string]string{
"a": "z",
"d": "y",
"long-label": strings.Repeat("example", 10000),
}
configLabels := map[string]string{
"a": "b",
"c": "d",
}
newLabels := buildLabels(configLabels, imageConfigLabels, crilabels.ContainerKindSandbox)
assert.Len(t, newLabels, 4)
assert.Equal(t, "b", newLabels["a"])
assert.Equal(t, "d", newLabels["c"])
assert.Equal(t, "y", newLabels["d"])
assert.Equal(t, crilabels.ContainerKindSandbox, newLabels[crilabels.ContainerKindLabel])
assert.NotContains(t, newLabels, "long-label")
newLabels["a"] = "e"
assert.Empty(t, configLabels[crilabels.ContainerKindLabel], "should not add new labels into original label")
assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label")
}
func TestGenerateRuntimeOptions(t *testing.T) {
nilOpts := `
systemd_cgroup = true
[containerd]
no_pivot = true
default_runtime_name = "default"
[containerd.runtimes.runcv2]
runtime_type = "` + plugins.RuntimeRuncV2 + `"
`
nonNilOpts := `
systemd_cgroup = true
[containerd]
no_pivot = true
default_runtime_name = "default"
[containerd.runtimes.legacy.options]
Runtime = "legacy"
RuntimeRoot = "/legacy"
[containerd.runtimes.runc.options]
BinaryName = "runc"
Root = "/runc"
NoNewKeyring = true
[containerd.runtimes.runcv2]
runtime_type = "` + plugins.RuntimeRuncV2 + `"
[containerd.runtimes.runcv2.options]
BinaryName = "runc"
Root = "/runcv2"
NoNewKeyring = true
`
var nilOptsConfig, nonNilOptsConfig criconfig.Config
err := toml.Unmarshal([]byte(nilOpts), &nilOptsConfig)
require.NoError(t, err)
require.Len(t, nilOptsConfig.Runtimes, 1)
err = toml.Unmarshal([]byte(nonNilOpts), &nonNilOptsConfig)
require.NoError(t, err)
require.Len(t, nonNilOptsConfig.Runtimes, 3)
for _, test := range []struct {
desc string
r criconfig.Runtime
c criconfig.Config
expectedOptions interface{}
}{
{
desc: "when options is nil, should return nil option for io.containerd.runc.v2",
r: nilOptsConfig.Runtimes["runcv2"],
c: nilOptsConfig,
expectedOptions: nil,
},
{
desc: "when options is not nil, should be able to decode for io.containerd.runc.v2",
r: nonNilOptsConfig.Runtimes["runcv2"],
c: nonNilOptsConfig,
expectedOptions: &runcoptions.Options{
BinaryName: "runc",
Root: "/runcv2",
NoNewKeyring: true,
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
opts, err := criconfig.GenerateRuntimeOptions(test.r)
assert.NoError(t, err)
assert.Equal(t, test.expectedOptions, opts)
})
}
}
func TestEnvDeduplication(t *testing.T) {
for _, test := range []struct {
desc string
existing []string
kv [][2]string
expected []string
}{
{
desc: "single env",
kv: [][2]string{
{"a", "b"},
},
expected: []string{"a=b"},
},
{
desc: "multiple envs",
kv: [][2]string{
{"a", "b"},
{"c", "d"},
{"e", "f"},
},
expected: []string{
"a=b",
"c=d",
"e=f",
},
},
{
desc: "env override",
kv: [][2]string{
{"k1", "v1"},
{"k2", "v2"},
{"k3", "v3"},
{"k3", "v4"},
{"k1", "v5"},
{"k4", "v6"},
},
expected: []string{
"k1=v5",
"k2=v2",
"k3=v4",
"k4=v6",
},
},
{
desc: "existing env",
existing: []string{
"k1=v1",
"k2=v2",
"k3=v3",
},
kv: [][2]string{
{"k3", "v4"},
{"k2", "v5"},
{"k4", "v6"},
},
expected: []string{
"k1=v1",
"k2=v5",
"k3=v4",
"k4=v6",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
var spec runtimespec.Spec
if len(test.existing) > 0 {
spec.Process = &runtimespec.Process{
Env: test.existing,
}
}
for _, kv := range test.kv {
oci.WithEnv([]string{kv[0] + "=" + kv[1]})(context.Background(), nil, nil, &spec)
}
assert.Equal(t, test.expected, spec.Process.Env)
})
}
}
func TestPassThroughAnnotationsFilter(t *testing.T) {
for _, test := range []struct {
desc string
podAnnotations map[string]string
runtimePodAnnotations []string
passthroughAnnotations map[string]string
}{
{
desc: "should support direct match",
podAnnotations: map[string]string{"c": "d", "d": "e"},
runtimePodAnnotations: []string{"c"},
passthroughAnnotations: map[string]string{"c": "d"},
},
{
desc: "should support wildcard match",
podAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
runtimePodAnnotations: []string{"*.f", "z*g", "y.c*"},
passthroughAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"y.ca": "b",
},
},
{
desc: "should support wildcard match all",
podAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
runtimePodAnnotations: []string{"*"},
passthroughAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
},
{
desc: "should support match including path separator",
podAnnotations: map[string]string{
"matchend.com/end": "1",
"matchend.com/end1": "2",
"matchend.com/1end": "3",
"matchmid.com/mid": "4",
"matchmid.com/mi1d": "5",
"matchmid.com/mid1": "6",
"matchhead.com/head": "7",
"matchhead.com/1head": "8",
"matchhead.com/head1": "9",
"matchall.com/abc": "10",
"matchall.com/def": "11",
"end/matchend": "12",
"end1/matchend": "13",
"1end/matchend": "14",
"mid/matchmid": "15",
"mi1d/matchmid": "16",
"mid1/matchmid": "17",
"head/matchhead": "18",
"1head/matchhead": "19",
"head1/matchhead": "20",
"abc/matchall": "21",
"def/matchall": "22",
"match1/match2": "23",
"nomatch/nomatch": "24",
},
runtimePodAnnotations: []string{
"matchend.com/end*",
"matchmid.com/mi*d",
"matchhead.com/*head",
"matchall.com/*",
"end*/matchend",
"mi*d/matchmid",
"*head/matchhead",
"*/matchall",
"match*/match*",
},
passthroughAnnotations: map[string]string{
"matchend.com/end": "1",
"matchend.com/end1": "2",
"matchmid.com/mid": "4",
"matchmid.com/mi1d": "5",
"matchhead.com/head": "7",
"matchhead.com/1head": "8",
"matchall.com/abc": "10",
"matchall.com/def": "11",
"end/matchend": "12",
"end1/matchend": "13",
"mid/matchmid": "15",
"mi1d/matchmid": "16",
"head/matchhead": "18",
"1head/matchhead": "19",
"abc/matchall": "21",
"def/matchall": "22",
"match1/match2": "23",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
passthroughAnnotations := getPassthroughAnnotations(test.podAnnotations, test.runtimePodAnnotations)
assert.Equal(t, test.passthroughAnnotations, passthroughAnnotations)
})
}
}
func TestEnsureRemoveAllNotExist(t *testing.T) {
// should never return an error for a non-existent path
if err := ensureRemoveAll(context.Background(), "/non/existent/path"); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithDir(t *testing.T) {
dir := t.TempDir()
if err := ensureRemoveAll(context.Background(), dir); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithFile(t *testing.T) {
tmp, err := os.CreateTemp("", "test-ensure-removeall-with-dir")
if err != nil {
t.Fatal(err)
}
tmp.Close()
if err := ensureRemoveAll(context.Background(), tmp.Name()); err != nil {
t.Fatal(err)
}
}
// Helper function for setting up an environment to test PID namespace targeting.
func addContainer(c *criService, containerID, sandboxID string, PID uint32, createdAt, startedAt, finishedAt int64) error {
meta := containerstore.Metadata{
ID: containerID,
SandboxID: sandboxID,
}
status := containerstore.Status{
Pid: PID,
CreatedAt: createdAt,
StartedAt: startedAt,
FinishedAt: finishedAt,
}
container, err := containerstore.NewContainer(meta,
containerstore.WithFakeStatus(status),
)
if err != nil {
return err
}
return c.containerStore.Add(container)
}
func TestValidateTargetContainer(t *testing.T) {
testSandboxID := "test-sandbox-uid"
// The existing container that will be targeted.
testTargetContainerID := "test-target-container"
testTargetContainerPID := uint32(4567)
// A container that has finished running and cannot be targeted.
testStoppedContainerID := "stopped-target-container"
testStoppedContainerPID := uint32(6789)
// A container from another pod.
testOtherContainerSandboxID := "other-sandbox-uid"
testOtherContainerID := "other-target-container"
testOtherContainerPID := uint32(7890)
// Container create/start/stop times.
createdAt := time.Now().Add(-15 * time.Second).UnixNano()
startedAt := time.Now().Add(-10 * time.Second).UnixNano()
finishedAt := time.Now().Add(-5 * time.Second).UnixNano()
c := newTestCRIService()
// Create a target container.
err := addContainer(c, testTargetContainerID, testSandboxID, testTargetContainerPID, createdAt, startedAt, 0)
require.NoError(t, err, "error creating test target container")
// Create a stopped container.
err = addContainer(c, testStoppedContainerID, testSandboxID, testStoppedContainerPID, createdAt, startedAt, finishedAt)
require.NoError(t, err, "error creating test stopped container")
// Create a container in another pod.
err = addContainer(c, testOtherContainerID, testOtherContainerSandboxID, testOtherContainerPID, createdAt, startedAt, 0)
require.NoError(t, err, "error creating test container in other pod")
for _, test := range []struct {
desc string
targetContainerID string
expectError bool
}{
{
desc: "target container in pod",
targetContainerID: testTargetContainerID,
expectError: false,
},
{
desc: "target stopped container in pod",
targetContainerID: testStoppedContainerID,
expectError: true,
},
{
desc: "target container does not exist",
targetContainerID: "no-container-with-this-id",
expectError: true,
},
{
desc: "target container in other pod",
targetContainerID: testOtherContainerID,
expectError: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
targetContainer, err := c.validateTargetContainer(testSandboxID, test.targetContainerID)
if test.expectError {
require.Error(t, err, "target should have been invalid but no error")
return
}
require.NoErrorf(t, err, "target should have been valid but got error")
assert.Equal(t, test.targetContainerID, targetContainer.ID, "returned target container does not have expected ID")
})
}
}
func TestGetRuntimeOptions(t *testing.T) {
_, err := getRuntimeOptions(containers.Container{})
require.NoError(t, err)
var pbany *types.Any // This is nil.
var typeurlAny typeurl.Any = pbany // This is typed nil.
_, err = getRuntimeOptions(containers.Container{Runtime: containers.RuntimeInfo{Options: typeurlAny}})
require.NoError(t, err)
}
func TestHostNetwork(t *testing.T) {
tests := []struct {
name string
c *runtime.PodSandboxConfig
expected bool
}{
{
name: "when pod namespace return false",
c: &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_POD,
},
},
},
},
expected: false,
},
{
name: "when node namespace return true",
c: &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_NODE,
},
},
},
},
expected: true,
},
}
for _, tt := range tests {
if goruntime.GOOS != "linux" {
t.Skip()
}
tt := tt
t.Run(tt.name, func(t *testing.T) {
if hostNetwork(tt.c) != tt.expected {
t.Errorf("failed hostNetwork got %t expected %t", hostNetwork(tt.c), tt.expected)
}
})
}
}

View File

@@ -0,0 +1,175 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"os"
"path/filepath"
"syscall"
"github.com/opencontainers/runtime-spec/specs-go"
)
// openLogFile opens/creates a container log file.
// It specifies `FILE_SHARE_DELETE` option to make sure
// log files can be rotated by kubelet.
//
// Unfortunately this needs to be maintained as Go doesn't
// have a way to set FILE_SHARE_DELETE for os.OpenFile.
// https://github.com/golang/go/issues/32088
func openLogFile(path string) (*os.File, error) {
path = fixLongPath(path)
if len(path) == 0 {
return nil, syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return nil, err
}
createmode := uint32(syscall.OPEN_ALWAYS)
access := uint32(syscall.FILE_APPEND_DATA)
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE)
h, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0)
if err != nil {
return nil, err
}
return os.NewFile(uintptr(h), path), nil
}
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// fixLongPath returns the extended-length (\\?\-prefixed) form of
// path when needed, in order to avoid the default 260 character file
// path limit imposed by Windows. If path is not easily converted to
// the extended-length form (for example, if path is a relative path
// or contains .. elements), or is short enough, fixLongPath returns
// path unmodified.
//
// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
//
// This is copied from https://golang.org/src/path/filepath/path_windows.go.
func fixLongPath(path string) string {
// Do nothing (and don't allocate) if the path is "short".
// Empirically (at least on the Windows Server 2013 builder),
// the kernel is arbitrarily okay with < 248 bytes. That
// matches what the docs above say:
// "When using an API to create a directory, the specified
// path cannot be so long that you cannot append an 8.3 file
// name (that is, the directory name cannot exceed MAX_PATH
// minus 12)." Since MAX_PATH is 260, 260 - 12 = 248.
//
// The MSDN docs appear to say that a normal path that is 248 bytes long
// will work; empirically the path must be less then 248 bytes long.
if len(path) < 248 {
// Don't fix. (This is how Go 1.7 and earlier worked,
// not automatically generating the \\?\ form)
return path
}
// The extended form begins with \\?\, as in
// \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt.
// The extended form disables evaluation of . and .. path
// elements and disables the interpretation of / as equivalent
// to \. The conversion here rewrites / to \ and elides
// . elements as well as trailing or duplicate separators. For
// simplicity it avoids the conversion entirely for relative
// paths or paths containing .. elements. For now,
// \\server\share paths are not converted to
// \\?\UNC\server\share paths because the rules for doing so
// are less well-specified.
if len(path) >= 2 && path[:2] == `\\` {
// Don't canonicalize UNC paths.
return path
}
if !filepath.IsAbs(path) {
// Relative path
return path
}
const prefix = `\\?`
pathbuf := make([]byte, len(prefix)+len(path)+len(`\`))
copy(pathbuf, prefix)
n := len(path)
r, w := 0, len(prefix)
for r < n {
switch {
case os.IsPathSeparator(path[r]):
// empty block
r++
case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])):
// /./
r++
case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])):
// /../ is currently unhandled
return path
default:
pathbuf[w] = '\\'
w++
for ; r < n && !os.IsPathSeparator(path[r]); r++ {
pathbuf[w] = path[r]
w++
}
}
}
// A drive's root directory needs a trailing \
if w == len(`\\?\c:`) {
pathbuf[w] = '\\'
w++
}
return string(pathbuf[:w])
}
// ensureRemoveAll is a wrapper for os.RemoveAll on Windows.
func ensureRemoveAll(_ context.Context, dir string) error {
return os.RemoveAll(dir)
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
return nil
}
func isUnifiedCgroupsMode() bool {
return false
}

View File

@@ -0,0 +1,71 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestWindowsHostNetwork(t *testing.T) {
tests := []struct {
name string
c *runtime.PodSandboxConfig
expected bool
}{
{
name: "when host process is false returns false",
c: &runtime.PodSandboxConfig{
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{
HostProcess: false,
},
},
},
expected: false,
},
{
name: "when host process is true return true",
c: &runtime.PodSandboxConfig{
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{
HostProcess: true,
},
},
},
expected: true,
},
{
name: "when no host process return false",
c: &runtime.PodSandboxConfig{
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{},
},
},
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if hostNetwork(tt.c) != tt.expected {
t.Errorf("failed hostNetwork got %t expected %t", hostNetwork(tt.c), tt.expected)
}
})
}
}

View File

@@ -0,0 +1,77 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"fmt"
"sync"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/log"
"github.com/containerd/platforms"
)
// LoadImages checks all existing images to ensure they are ready to
// be used for CRI. It may try to recover images which are not ready
// but will only log errors, not return any.
func (c *CRIImageService) CheckImages(ctx context.Context) error {
// TODO: Move way from `client.ListImages` to directly using image store
cImages, err := c.client.ListImages(ctx)
if err != nil {
return fmt.Errorf("unable to list images: %w", err)
}
// TODO: Support all snapshotter
snapshotter := c.config.Snapshotter
var wg sync.WaitGroup
for _, i := range cImages {
wg.Add(1)
i := i
go func() {
defer wg.Done()
// TODO: Check platform/snapshot combination. Snapshot check should come first
ok, _, _, _, err := images.Check(ctx, i.ContentStore(), i.Target(), platforms.Default())
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to check image content readiness for %q", i.Name())
return
}
if !ok {
log.G(ctx).Warnf("The image content readiness for %q is not ok", i.Name())
return
}
// Checking existence of top-level snapshot for each image being recovered.
// TODO: This logic should be done elsewhere and owned by the image service
unpacked, err := i.IsUnpacked(ctx, snapshotter)
if err != nil {
log.G(ctx).WithError(err).Warnf("Failed to check whether image is unpacked for image %s", i.Name())
return
}
if !unpacked {
log.G(ctx).Warnf("The image %s is not unpacked.", i.Name())
// TODO(random-liu): Consider whether we should try unpack here.
}
if err := c.UpdateImage(ctx, i.Name()); err != nil {
log.G(ctx).WithError(err).Warnf("Failed to update reference for image %q", i.Name())
return
}
log.G(ctx).Debugf("Loaded image %q", i.Name())
}()
}
wg.Wait()
return nil
}

View File

@@ -0,0 +1,40 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ListImages lists existing images.
// TODO(random-liu): Add image list filters after CRI defines this more clear, and kubelet
// actually needs it.
func (c *GRPCCRIImageService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (*runtime.ListImagesResponse, error) {
// TODO: From CRIImageService directly
imagesInStore := c.imageStore.List()
var images []*runtime.Image
for _, image := range imagesInStore {
// TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
// doesn't exist?
images = append(images, toCRIImage(image))
}
return &runtime.ListImagesResponse{Images: images}, nil
}

View File

@@ -0,0 +1,113 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
)
func TestListImages(t *testing.T) {
_, c := newTestCRIService()
imagesInStore := []imagestore.Image{
{
ID: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
ChainID: "test-chainid-1",
References: []string{
"gcr.io/library/busybox:latest",
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 1000,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "root",
},
},
},
{
ID: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
ChainID: "test-chainid-2",
References: []string{
"gcr.io/library/alpine:latest",
"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 2000,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "1234:1234",
},
},
},
{
ID: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
ChainID: "test-chainid-3",
References: []string{
"gcr.io/library/ubuntu:latest",
"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 3000,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "nobody",
},
},
},
}
expect := []*runtime.Image{
{
Id: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
RepoTags: []string{"gcr.io/library/busybox:latest"},
RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(1000),
Username: "root",
},
{
Id: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
RepoTags: []string{"gcr.io/library/alpine:latest"},
RepoDigests: []string{"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(2000),
Uid: &runtime.Int64Value{Value: 1234},
},
{
Id: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
RepoTags: []string{"gcr.io/library/ubuntu:latest"},
RepoDigests: []string{"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(3000),
Username: "nobody",
},
}
var err error
c.imageStore, err = imagestore.NewFakeStore(imagesInStore)
assert.NoError(t, err)
resp, err := c.ListImages(context.Background(), &runtime.ListImagesRequest{})
assert.NoError(t, err)
require.NotNil(t, resp)
images := resp.GetImages()
assert.Len(t, images, len(expect))
for _, i := range expect {
assert.Contains(t, images, i)
}
}

View File

@@ -0,0 +1,792 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"crypto/tls"
"encoding/base64"
"fmt"
"io"
"net"
"net/http"
"net/url"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/containerd/log"
distribution "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
eventstypes "github.com/containerd/containerd/v2/api/events"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/diff"
containerdimages "github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/core/remotes/docker/config"
"github.com/containerd/containerd/v2/internal/cri/annotations"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
snpkg "github.com/containerd/containerd/v2/pkg/snapshotters"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
)
// For image management:
// 1) We have an in-memory metadata index to:
// a. Maintain ImageID -> RepoTags, ImageID -> RepoDigset relationships; ImageID
// is the digest of image config, which conforms to oci image spec.
// b. Cache constant and useful information such as image chainID, config etc.
// c. An image will be added into the in-memory metadata only when it's successfully
// pulled and unpacked.
//
// 2) We use containerd image metadata store and content store:
// a. To resolve image reference (digest/tag) locally. During pulling image, we
// normalize the image reference provided by user, and put it into image metadata
// store with resolved descriptor. For the other operations, if image id is provided,
// we'll access the in-memory metadata index directly; if image reference is
// provided, we'll normalize it, resolve it in containerd image metadata store
// to get the image id.
// b. As the backup of in-memory metadata in 1). During startup, the in-memory
// metadata could be re-constructed from image metadata store + content store.
//
// Several problems with current approach:
// 1) An entry in containerd image metadata store doesn't mean a "READY" (successfully
// pulled and unpacked) image. E.g. during pulling, the client gets killed. In that case,
// if we saw an image without snapshots or with in-complete contents during startup,
// should we re-pull the image? Or should we remove the entry?
//
// yanxuean: We can't delete image directly, because we don't know if the image
// is pulled by us. There are resource leakage.
//
// 2) Containerd suggests user to add entry before pulling the image. However if
// an error occurs during the pulling, should we remove the entry from metadata
// store? Or should we leave it there until next startup (resource leakage)?
//
// 3) The cri plugin only exposes "READY" (successfully pulled and unpacked) images
// to the user, which are maintained in the in-memory metadata index. However, it's
// still possible that someone else removes the content or snapshot by-pass the cri plugin,
// how do we detect that and update the in-memory metadata correspondingly? Always
// check whether corresponding snapshot is ready when reporting image status?
//
// 4) Is the content important if we cached necessary information in-memory
// after we pull the image? How to manage the disk usage of contents? If some
// contents are missing but snapshots are ready, is the image still "READY"?
// PullImage pulls an image with authentication config.
func (c *GRPCCRIImageService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) {
imageRef := r.GetImage().GetImage()
credentials := func(host string) (string, string, error) {
hostauth := r.GetAuth()
if hostauth == nil {
config := c.config.Registry.Configs[host]
if config.Auth != nil {
hostauth = toRuntimeAuthConfig(*config.Auth)
}
}
return ParseAuth(hostauth, host)
}
ref, err := c.CRIImageService.PullImage(ctx, imageRef, credentials, r.SandboxConfig)
if err != nil {
return nil, err
}
return &runtime.PullImageResponse{ImageRef: ref}, nil
}
func (c *CRIImageService) PullImage(ctx context.Context, name string, credentials func(string) (string, string, error), sandboxConfig *runtime.PodSandboxConfig) (_ string, err error) {
span := tracing.SpanFromContext(ctx)
defer func() {
// TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism
// for the user to configure the set of registries that they are interested in.
if err != nil {
imagePulls.WithValues("failure").Inc()
} else {
imagePulls.WithValues("success").Inc()
}
}()
inProgressImagePulls.Inc()
defer inProgressImagePulls.Dec()
startTime := time.Now()
namedRef, err := distribution.ParseDockerRef(name)
if err != nil {
return "", fmt.Errorf("failed to parse image reference %q: %w", name, err)
}
ref := namedRef.String()
if ref != name {
log.G(ctx).Debugf("PullImage using normalized image ref: %q", ref)
}
imagePullProgressTimeout, err := time.ParseDuration(c.config.ImagePullProgressTimeout)
if err != nil {
return "", fmt.Errorf("failed to parse image_pull_progress_timeout %q: %w", c.config.ImagePullProgressTimeout, err)
}
var (
pctx, pcancel = context.WithCancel(ctx)
pullReporter = newPullProgressReporter(ref, pcancel, imagePullProgressTimeout)
resolver = docker.NewResolver(docker.ResolverOptions{
Headers: c.config.Registry.Headers,
Hosts: c.registryHosts(ctx, credentials, pullReporter.optionUpdateClient),
})
isSchema1 bool
imageHandler containerdimages.HandlerFunc = func(_ context.Context,
desc imagespec.Descriptor) ([]imagespec.Descriptor, error) {
if desc.MediaType == containerdimages.MediaTypeDockerSchema1Manifest {
isSchema1 = true
}
return nil, nil
}
)
defer pcancel()
snapshotter, err := c.snapshotterFromPodSandboxConfig(ctx, ref, sandboxConfig)
if err != nil {
return "", err
}
log.G(ctx).Debugf("PullImage %q with snapshotter %s", ref, snapshotter)
span.SetAttributes(
tracing.Attribute("image.ref", ref),
tracing.Attribute("snapshotter.name", snapshotter),
)
labels := c.getLabels(ctx, ref)
pullOpts := []containerd.RemoteOpt{
containerd.WithSchema1Conversion, //nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility.
containerd.WithResolver(resolver),
containerd.WithPullSnapshotter(snapshotter),
containerd.WithPullUnpack,
containerd.WithPullLabels(labels),
containerd.WithMaxConcurrentDownloads(c.config.MaxConcurrentDownloads),
containerd.WithImageHandler(imageHandler),
containerd.WithUnpackOpts([]containerd.UnpackOpt{
containerd.WithUnpackDuplicationSuppressor(c.unpackDuplicationSuppressor),
containerd.WithUnpackApplyOpts(diff.WithSyncFs(c.config.ImagePullWithSyncFs)),
}),
}
// Temporarily removed for v2 upgrade
//pullOpts = append(pullOpts, c.encryptedImagesPullOpts()...)
if !c.config.DisableSnapshotAnnotations {
pullOpts = append(pullOpts,
containerd.WithImageHandlerWrapper(snpkg.AppendInfoHandlerWrapper(ref)))
}
if c.config.DiscardUnpackedLayers {
// Allows GC to clean layers up from the content store after unpacking
pullOpts = append(pullOpts,
containerd.WithChildLabelMap(containerdimages.ChildGCLabelsFilterLayers))
}
pullReporter.start(pctx)
image, err := c.client.Pull(pctx, ref, pullOpts...)
pcancel()
if err != nil {
return "", fmt.Errorf("failed to pull and unpack image %q: %w", ref, err)
}
span.AddEvent("Pull and unpack image complete")
configDesc, err := image.Config(ctx)
if err != nil {
return "", fmt.Errorf("get image config descriptor: %w", err)
}
imageID := configDesc.Digest.String()
repoDigest, repoTag := getRepoDigestAndTag(namedRef, image.Target().Digest, isSchema1)
for _, r := range []string{imageID, repoTag, repoDigest} {
if r == "" {
continue
}
if err := c.createImageReference(ctx, r, image.Target(), labels); err != nil {
return "", fmt.Errorf("failed to create image reference %q: %w", r, err)
}
// Update image store to reflect the newest state in containerd.
// No need to use `updateImage`, because the image reference must
// have been managed by the cri plugin.
// TODO: Use image service directly
if err := c.imageStore.Update(ctx, r); err != nil {
return "", fmt.Errorf("failed to update image store %q: %w", r, err)
}
}
const mbToByte = 1024 * 1024
size, _ := image.Size(ctx)
imagePullingSpeed := float64(size) / mbToByte / time.Since(startTime).Seconds()
imagePullThroughput.Observe(imagePullingSpeed)
log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", name, imageID,
repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime))
// NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain
// in-memory image store, it's only for in-memory indexing. The image could be removed
// by someone else anytime, before/during/after we create the metadata. We should always
// check the actual state in containerd before using the image or returning status of the
// image.
return imageID, nil
}
// getRepoDigestAngTag returns image repoDigest and repoTag of the named image reference.
func getRepoDigestAndTag(namedRef distribution.Named, digest imagedigest.Digest, schema1 bool) (string, string) {
var repoTag, repoDigest string
if _, ok := namedRef.(distribution.NamedTagged); ok {
repoTag = namedRef.String()
}
if _, ok := namedRef.(distribution.Canonical); ok {
repoDigest = namedRef.String()
} else if !schema1 {
// digest is not actual repo digest for schema1 image.
repoDigest = namedRef.Name() + "@" + digest.String()
}
return repoDigest, repoTag
}
// ParseAuth parses AuthConfig and returns username and password/secret required by containerd.
func ParseAuth(auth *runtime.AuthConfig, host string) (string, string, error) {
if auth == nil {
return "", "", nil
}
if auth.ServerAddress != "" {
// Do not return the auth info when server address doesn't match.
u, err := url.Parse(auth.ServerAddress)
if err != nil {
return "", "", fmt.Errorf("parse server address: %w", err)
}
if host != u.Host {
return "", "", nil
}
}
if auth.Username != "" {
return auth.Username, auth.Password, nil
}
if auth.IdentityToken != "" {
return "", auth.IdentityToken, nil
}
if auth.Auth != "" {
decLen := base64.StdEncoding.DecodedLen(len(auth.Auth))
decoded := make([]byte, decLen)
_, err := base64.StdEncoding.Decode(decoded, []byte(auth.Auth))
if err != nil {
return "", "", err
}
user, passwd, ok := strings.Cut(string(decoded), ":")
if !ok {
return "", "", fmt.Errorf("invalid decoded auth: %q", decoded)
}
return user, strings.Trim(passwd, "\x00"), nil
}
// TODO(random-liu): Support RegistryToken.
// An empty auth config is valid for anonymous registry
return "", "", nil
}
// createImageReference creates image reference inside containerd image store.
// Note that because create and update are not finished in one transaction, there could be race. E.g.
// the image reference is deleted by someone else after create returns already exists, but before update
// happens.
func (c *CRIImageService) createImageReference(ctx context.Context, name string, desc imagespec.Descriptor, labels map[string]string) error {
img := containerdimages.Image{
Name: name,
Target: desc,
// Add a label to indicate that the image is managed by the cri plugin.
Labels: labels,
}
// TODO(random-liu): Figure out which is the more performant sequence create then update or
// update then create.
// TODO: Call CRIImageService directly
oldImg, err := c.images.Create(ctx, img)
if err == nil {
if c.publisher != nil {
if err := c.publisher.Publish(ctx, "/images/create", &eventstypes.ImageCreate{
Name: img.Name,
Labels: img.Labels,
}); err != nil {
return err
}
}
return nil
} else if !errdefs.IsAlreadyExists(err) {
return err
}
if oldImg.Target.Digest == img.Target.Digest && oldImg.Labels[crilabels.ImageLabelKey] == labels[crilabels.ImageLabelKey] {
return nil
}
_, err = c.images.Update(ctx, img, "target", "labels."+crilabels.ImageLabelKey)
if err == nil && c.publisher != nil {
if c.publisher != nil {
if err := c.publisher.Publish(ctx, "/images/update", &eventstypes.ImageUpdate{
Name: img.Name,
Labels: img.Labels,
}); err != nil {
return err
}
}
}
return err
}
// getLabels get image labels to be added on CRI image
func (c *CRIImageService) getLabels(ctx context.Context, name string) map[string]string {
labels := map[string]string{crilabels.ImageLabelKey: crilabels.ImageLabelValue}
for _, pinned := range c.config.PinnedImages {
if pinned == name {
labels[crilabels.PinnedImageLabelKey] = crilabels.PinnedImageLabelValue
}
}
return labels
}
// updateImage updates image store to reflect the newest state of an image reference
// in containerd. If the reference is not managed by the cri plugin, the function also
// generates necessary metadata for the image and make it managed.
func (c *CRIImageService) UpdateImage(ctx context.Context, r string) error {
// TODO: Use image service
img, err := c.client.GetImage(ctx, r)
if err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("get image by reference: %w", err)
}
if err == nil && img.Labels()[crilabels.ImageLabelKey] != crilabels.ImageLabelValue {
// Make sure the image has the image id as its unique
// identifier that references the image in its lifetime.
configDesc, err := img.Config(ctx)
if err != nil {
return fmt.Errorf("get image id: %w", err)
}
id := configDesc.Digest.String()
labels := c.getLabels(ctx, id)
if err := c.createImageReference(ctx, id, img.Target(), labels); err != nil {
return fmt.Errorf("create image id reference %q: %w", id, err)
}
if err := c.imageStore.Update(ctx, id); err != nil {
return fmt.Errorf("update image store for %q: %w", id, err)
}
// The image id is ready, add the label to mark the image as managed.
if err := c.createImageReference(ctx, r, img.Target(), labels); err != nil {
return fmt.Errorf("create managed label: %w", err)
}
}
// If the image is not found, we should continue updating the cache,
// so that the image can be removed from the cache.
if err := c.imageStore.Update(ctx, r); err != nil {
return fmt.Errorf("update image store for %q: %w", r, err)
}
return nil
}
func hostDirFromRoots(roots []string) func(string) (string, error) {
rootfn := make([]func(string) (string, error), len(roots))
for i := range roots {
rootfn[i] = config.HostDirFromRoot(roots[i])
}
return func(host string) (dir string, err error) {
for _, fn := range rootfn {
dir, err = fn(host)
if (err != nil && !errdefs.IsNotFound(err)) || (dir != "") {
break
}
}
return
}
}
// registryHosts is the registry hosts to be used by the resolver.
func (c *CRIImageService) registryHosts(ctx context.Context, credentials func(host string) (string, string, error), updateClientFn config.UpdateClientFunc) docker.RegistryHosts {
paths := filepath.SplitList(c.config.Registry.ConfigPath)
if len(paths) > 0 {
hostOptions := config.HostOptions{
UpdateClient: updateClientFn,
}
hostOptions.Credentials = credentials
hostOptions.HostDir = hostDirFromRoots(paths)
return config.ConfigureHosts(ctx, hostOptions)
}
return func(host string) ([]docker.RegistryHost, error) {
var registries []docker.RegistryHost
endpoints, err := c.registryEndpoints(host)
if err != nil {
return nil, fmt.Errorf("get registry endpoints: %w", err)
}
for _, e := range endpoints {
u, err := url.Parse(e)
if err != nil {
return nil, fmt.Errorf("parse registry endpoint %q from mirrors: %w", e, err)
}
var (
transport = newTransport()
client = &http.Client{Transport: transport}
config = c.config.Registry.Configs[u.Host]
)
if docker.IsLocalhost(host) && u.Scheme == "http" {
// Skipping TLS verification for localhost
transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true,
}
}
// Make a copy of `credentials`, so that different authorizers would not reference
// the same credentials variable.
credentials := credentials
if credentials == nil && config.Auth != nil {
auth := toRuntimeAuthConfig(*config.Auth)
credentials = func(host string) (string, string, error) {
return ParseAuth(auth, host)
}
}
if updateClientFn != nil {
if err := updateClientFn(client); err != nil {
return nil, fmt.Errorf("failed to update http client: %w", err)
}
}
authorizer := docker.NewDockerAuthorizer(
docker.WithAuthClient(client),
docker.WithAuthCreds(credentials))
if u.Path == "" {
u.Path = "/v2"
}
registries = append(registries, docker.RegistryHost{
Client: client,
Authorizer: authorizer,
Host: u.Host,
Scheme: u.Scheme,
Path: u.Path,
Capabilities: docker.HostCapabilityResolve | docker.HostCapabilityPull,
})
}
return registries, nil
}
}
// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config.
func toRuntimeAuthConfig(a criconfig.AuthConfig) *runtime.AuthConfig {
return &runtime.AuthConfig{
Username: a.Username,
Password: a.Password,
Auth: a.Auth,
IdentityToken: a.IdentityToken,
}
}
// defaultScheme returns the default scheme for a registry host.
func defaultScheme(host string) string {
if docker.IsLocalhost(host) {
return "http"
}
return "https"
}
// addDefaultScheme returns the endpoint with default scheme
func addDefaultScheme(endpoint string) (string, error) {
if strings.Contains(endpoint, "://") {
return endpoint, nil
}
ue := "dummy://" + endpoint
u, err := url.Parse(ue)
if err != nil {
return "", err
}
return fmt.Sprintf("%s://%s", defaultScheme(u.Host), endpoint), nil
}
// registryEndpoints returns endpoints for a given host.
// It adds default registry endpoint if it does not exist in the passed-in endpoint list.
// It also supports wildcard host matching with `*`.
func (c *CRIImageService) registryEndpoints(host string) ([]string, error) {
var endpoints []string
_, ok := c.config.Registry.Mirrors[host]
if ok {
endpoints = c.config.Registry.Mirrors[host].Endpoints
} else {
endpoints = c.config.Registry.Mirrors["*"].Endpoints
}
defaultHost, err := docker.DefaultHost(host)
if err != nil {
return nil, fmt.Errorf("get default host: %w", err)
}
for i := range endpoints {
en, err := addDefaultScheme(endpoints[i])
if err != nil {
return nil, fmt.Errorf("parse endpoint url: %w", err)
}
endpoints[i] = en
}
for _, e := range endpoints {
u, err := url.Parse(e)
if err != nil {
return nil, fmt.Errorf("parse endpoint url: %w", err)
}
if u.Host == host {
// Do not add default if the endpoint already exists.
return endpoints, nil
}
}
return append(endpoints, defaultScheme(defaultHost)+"://"+defaultHost), nil
}
// newTransport returns a new HTTP transport used to pull image.
// TODO(random-liu): Create a library and share this code with `ctr`.
func newTransport() *http.Transport {
return &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
FallbackDelay: 300 * time.Millisecond,
}).DialContext,
MaxIdleConns: 10,
IdleConnTimeout: 30 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 5 * time.Second,
}
}
// encryptedImagesPullOpts returns the necessary list of pull options required
// for decryption of encrypted images based on the cri decryption configuration.
// Temporarily removed for v2 upgrade
//func (c *CRIImageService) encryptedImagesPullOpts() []containerd.RemoteOpt {
// if c.config.ImageDecryption.KeyModel == criconfig.KeyModelNode {
// ltdd := imgcrypt.Payload{}
// decUnpackOpt := encryption.WithUnpackConfigApplyOpts(encryption.WithDecryptedUnpack(&ltdd))
// opt := containerd.WithUnpackOpts([]containerd.UnpackOpt{decUnpackOpt})
// return []containerd.RemoteOpt{opt}
// }
// return nil
//}
const (
// defaultPullProgressReportInterval represents that how often the
// reporter checks that pull progress.
defaultPullProgressReportInterval = 10 * time.Second
)
// pullProgressReporter is used to check single PullImage progress.
type pullProgressReporter struct {
ref string
cancel context.CancelFunc
reqReporter pullRequestReporter
timeout time.Duration
}
func newPullProgressReporter(ref string, cancel context.CancelFunc, timeout time.Duration) *pullProgressReporter {
return &pullProgressReporter{
ref: ref,
cancel: cancel,
reqReporter: pullRequestReporter{},
timeout: timeout,
}
}
func (reporter *pullProgressReporter) optionUpdateClient(client *http.Client) error {
client.Transport = &pullRequestReporterRoundTripper{
rt: client.Transport,
reqReporter: &reporter.reqReporter,
}
return nil
}
func (reporter *pullProgressReporter) start(ctx context.Context) {
if reporter.timeout == 0 {
log.G(ctx).Infof("no timeout and will not start pulling image %s reporter", reporter.ref)
return
}
go func() {
var (
reportInterval = defaultPullProgressReportInterval
lastSeenBytesRead = uint64(0)
lastSeenTimestamp = time.Now()
)
// check progress more frequently if timeout < default internal
if reporter.timeout < reportInterval {
reportInterval = reporter.timeout / 2
}
var ticker = time.NewTicker(reportInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
activeReqs, bytesRead := reporter.reqReporter.status()
log.G(ctx).WithField("ref", reporter.ref).
WithField("activeReqs", activeReqs).
WithField("totalBytesRead", bytesRead).
WithField("lastSeenBytesRead", lastSeenBytesRead).
WithField("lastSeenTimestamp", lastSeenTimestamp.Format(time.RFC3339)).
WithField("reportInterval", reportInterval).
Debugf("progress for image pull")
if activeReqs == 0 || bytesRead > lastSeenBytesRead {
lastSeenBytesRead = bytesRead
lastSeenTimestamp = time.Now()
continue
}
if time.Since(lastSeenTimestamp) > reporter.timeout {
log.G(ctx).Errorf("cancel pulling image %s because of no progress in %v", reporter.ref, reporter.timeout)
reporter.cancel()
return
}
case <-ctx.Done():
activeReqs, bytesRead := reporter.reqReporter.status()
log.G(ctx).Infof("stop pulling image %s: active requests=%v, bytes read=%v", reporter.ref, activeReqs, bytesRead)
return
}
}
}()
}
// countingReadCloser wraps http.Response.Body with pull request reporter,
// which is used by pullRequestReporterRoundTripper.
type countingReadCloser struct {
once sync.Once
rc io.ReadCloser
reqReporter *pullRequestReporter
}
// Read reads bytes from original io.ReadCloser and increases bytes in
// pull request reporter.
func (r *countingReadCloser) Read(p []byte) (int, error) {
n, err := r.rc.Read(p)
r.reqReporter.incByteRead(uint64(n))
return n, err
}
// Close closes the original io.ReadCloser and only decreases the number of
// active pull requests once.
func (r *countingReadCloser) Close() error {
err := r.rc.Close()
r.once.Do(r.reqReporter.decRequest)
return err
}
// pullRequestReporter is used to track the progress per each criapi.PullImage.
type pullRequestReporter struct {
// activeReqs indicates that current number of active pulling requests,
// including auth requests.
activeReqs int32
// totalBytesRead indicates that the total bytes has been read from
// remote registry.
totalBytesRead uint64
}
func (reporter *pullRequestReporter) incRequest() {
atomic.AddInt32(&reporter.activeReqs, 1)
}
func (reporter *pullRequestReporter) decRequest() {
atomic.AddInt32(&reporter.activeReqs, -1)
}
func (reporter *pullRequestReporter) incByteRead(nr uint64) {
atomic.AddUint64(&reporter.totalBytesRead, nr)
}
func (reporter *pullRequestReporter) status() (currentReqs int32, totalBytesRead uint64) {
currentReqs = atomic.LoadInt32(&reporter.activeReqs)
totalBytesRead = atomic.LoadUint64(&reporter.totalBytesRead)
return currentReqs, totalBytesRead
}
// pullRequestReporterRoundTripper wraps http.RoundTripper with pull request
// reporter which is used to track the progress of active http request with
// counting readable http.Response.Body.
//
// NOTE:
//
// Although containerd provides ingester manager to track the progress
// of pulling request, for example `ctr image pull` shows the console progress
// bar, it needs more CPU resources to open/read the ingested files with
// acquiring containerd metadata plugin's boltdb lock.
//
// Before sending HTTP request to registry, the containerd.Client.Pull library
// will open writer by containerd ingester manager. Based on this, the
// http.RoundTripper wrapper can track the active progress with lower overhead
// even if the ref has been locked in ingester manager by other Pull request.
type pullRequestReporterRoundTripper struct {
rt http.RoundTripper
reqReporter *pullRequestReporter
}
func (rt *pullRequestReporterRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
rt.reqReporter.incRequest()
resp, err := rt.rt.RoundTrip(req)
if err != nil {
rt.reqReporter.decRequest()
return nil, err
}
resp.Body = &countingReadCloser{
rc: resp.Body,
reqReporter: rt.reqReporter,
}
return resp, err
}
// Given that runtime information is not passed from PullImageRequest, we depend on an experimental annotation
// passed from pod sandbox config to get the runtimeHandler. The annotation key is specified in configuration.
// Once we know the runtime, try to override default snapshotter if it is set for this runtime.
// See https://github.com/containerd/containerd/issues/6657
func (c *CRIImageService) snapshotterFromPodSandboxConfig(ctx context.Context, imageRef string,
s *runtime.PodSandboxConfig) (string, error) {
snapshotter := c.config.Snapshotter
if s == nil || s.Annotations == nil {
return snapshotter, nil
}
runtimeHandler, ok := s.Annotations[annotations.RuntimeHandler]
if !ok {
return snapshotter, nil
}
// TODO: Ensure error is returned if runtime not found?
if c.runtimePlatforms != nil {
if p, ok := c.runtimePlatforms[runtimeHandler]; ok && p.Snapshotter != snapshotter {
snapshotter = p.Snapshotter
log.G(ctx).Infof("experimental: PullImage %q for runtime %s, using snapshotter %s", imageRef, runtimeHandler, snapshotter)
}
}
return snapshotter, nil
}

View File

@@ -0,0 +1,543 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"encoding/base64"
"testing"
docker "github.com/distribution/reference"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/internal/cri/annotations"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/labels"
"github.com/containerd/platforms"
)
func TestParseAuth(t *testing.T) {
testUser := "username"
testPasswd := "password"
testAuthLen := base64.StdEncoding.EncodedLen(len(testUser + ":" + testPasswd))
testAuth := make([]byte, testAuthLen)
base64.StdEncoding.Encode(testAuth, []byte(testUser+":"+testPasswd))
invalidAuth := make([]byte, testAuthLen)
base64.StdEncoding.Encode(invalidAuth, []byte(testUser+"@"+testPasswd))
for _, test := range []struct {
desc string
auth *runtime.AuthConfig
host string
expectedUser string
expectedSecret string
expectErr bool
}{
{
desc: "should not return error if auth config is nil",
},
{
desc: "should not return error if empty auth is provided for access to anonymous registry",
auth: &runtime.AuthConfig{},
expectErr: false,
},
{
desc: "should support identity token",
auth: &runtime.AuthConfig{IdentityToken: "abcd"},
expectedSecret: "abcd",
},
{
desc: "should support username and password",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
},
expectedUser: testUser,
expectedSecret: testPasswd,
},
{
desc: "should support auth",
auth: &runtime.AuthConfig{Auth: string(testAuth)},
expectedUser: testUser,
expectedSecret: testPasswd,
},
{
desc: "should return error for invalid auth",
auth: &runtime.AuthConfig{Auth: string(invalidAuth)},
expectErr: true,
},
{
desc: "should return empty auth if server address doesn't match",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
ServerAddress: "https://registry-1.io",
},
host: "registry-2.io",
expectedUser: "",
expectedSecret: "",
},
{
desc: "should return auth if server address matches",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
ServerAddress: "https://registry-1.io",
},
host: "registry-1.io",
expectedUser: testUser,
expectedSecret: testPasswd,
},
{
desc: "should return auth if server address is not specified",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
},
host: "registry-1.io",
expectedUser: testUser,
expectedSecret: testPasswd,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
u, s, err := ParseAuth(test.auth, test.host)
assert.Equal(t, test.expectErr, err != nil)
assert.Equal(t, test.expectedUser, u)
assert.Equal(t, test.expectedSecret, s)
})
}
}
func TestRegistryEndpoints(t *testing.T) {
for _, test := range []struct {
desc string
mirrors map[string]criconfig.Mirror
host string
expected []string
}{
{
desc: "no mirror configured",
mirrors: map[string]criconfig.Mirror{
"registry-1.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-3.io",
},
},
{
desc: "mirror configured",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "wildcard mirror configured",
mirrors: map[string]criconfig.Mirror{
"*": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "host should take precedence if both host and wildcard mirrors are configured",
mirrors: map[string]criconfig.Mirror{
"*": {
Endpoints: []string{
"https://registry-1.io",
},
},
"registry-3.io": {
Endpoints: []string{
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "default endpoint in list with http",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
"http://registry-3.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"http://registry-3.io",
},
},
{
desc: "default endpoint in list with https",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "default endpoint in list with path",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io/path",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io/path",
},
},
{
desc: "miss scheme endpoint in list with path",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-3.io",
"registry-1.io",
"127.0.0.1:1234",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-3.io",
"https://registry-1.io",
"http://127.0.0.1:1234",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c, _ := newTestCRIService()
c.config.Registry.Mirrors = test.mirrors
got, err := c.registryEndpoints(test.host)
assert.NoError(t, err)
assert.Equal(t, test.expected, got)
})
}
}
func TestDefaultScheme(t *testing.T) {
for _, test := range []struct {
desc string
host string
expected string
}{
{
desc: "should use http by default for localhost",
host: "localhost",
expected: "http",
},
{
desc: "should use http by default for localhost with port",
host: "localhost:8080",
expected: "http",
},
{
desc: "should use http by default for 127.0.0.1",
host: "127.0.0.1",
expected: "http",
},
{
desc: "should use http by default for 127.0.0.1 with port",
host: "127.0.0.1:8080",
expected: "http",
},
{
desc: "should use http by default for ::1",
host: "::1",
expected: "http",
},
{
desc: "should use http by default for ::1 with port",
host: "[::1]:8080",
expected: "http",
},
{
desc: "should use https by default for remote host",
host: "remote",
expected: "https",
},
{
desc: "should use https by default for remote host with port",
host: "remote:8080",
expected: "https",
},
{
desc: "should use https by default for remote ip",
host: "8.8.8.8",
expected: "https",
},
{
desc: "should use https by default for remote ip with port",
host: "8.8.8.8:8080",
expected: "https",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := defaultScheme(test.host)
assert.Equal(t, test.expected, got)
})
}
}
// Temporarily remove for v2 upgrade
//func TestEncryptedImagePullOpts(t *testing.T) {
// for _, test := range []struct {
// desc string
// keyModel string
// expectedOpts int
// }{
// {
// desc: "node key model should return one unpack opt",
// keyModel: criconfig.KeyModelNode,
// expectedOpts: 1,
// },
// {
// desc: "no key model selected should default to node key model",
// keyModel: "",
// expectedOpts: 0,
// },
// } {
// test := test
// t.Run(test.desc, func(t *testing.T) {
// c, _ := newTestCRIService()
// c.config.ImageDecryption.KeyModel = test.keyModel
// got := len(c.encryptedImagesPullOpts())
// assert.Equal(t, test.expectedOpts, got)
// })
// }
//}
func TestSnapshotterFromPodSandboxConfig(t *testing.T) {
defaultSnashotter := "native"
runtimeSnapshotter := "devmapper"
tests := []struct {
desc string
podSandboxConfig *runtime.PodSandboxConfig
expectSnapshotter string
expectErr bool
}{
{
desc: "should return default snapshotter for nil podSandboxConfig",
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return default snapshotter for nil podSandboxConfig.Annotations",
podSandboxConfig: &runtime.PodSandboxConfig{},
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return default snapshotter for empty podSandboxConfig.Annotations",
podSandboxConfig: &runtime.PodSandboxConfig{
Annotations: make(map[string]string),
},
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return default snapshotter for runtime not found",
podSandboxConfig: &runtime.PodSandboxConfig{
Annotations: map[string]string{
annotations.RuntimeHandler: "runtime-not-exists",
},
},
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return snapshotter provided in podSandboxConfig.Annotations",
podSandboxConfig: &runtime.PodSandboxConfig{
Annotations: map[string]string{
annotations.RuntimeHandler: "exiting-runtime",
},
},
expectSnapshotter: runtimeSnapshotter,
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
cri, _ := newTestCRIService()
cri.config.Snapshotter = defaultSnashotter
cri.runtimePlatforms["exiting-runtime"] = ImagePlatform{
Platform: platforms.DefaultSpec(),
Snapshotter: runtimeSnapshotter,
}
snapshotter, err := cri.snapshotterFromPodSandboxConfig(context.Background(), "test-image", tt.podSandboxConfig)
assert.Equal(t, tt.expectSnapshotter, snapshotter)
if tt.expectErr {
assert.Error(t, err)
}
})
}
}
func TestGetRepoDigestAndTag(t *testing.T) {
digest := digest.Digest("sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582")
for _, test := range []struct {
desc string
ref string
schema1 bool
expectedRepoDigest string
expectedRepoTag string
}{
{
desc: "repo tag should be empty if original ref has no tag",
ref: "gcr.io/library/busybox@" + digest.String(),
expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
},
{
desc: "repo tag should not be empty if original ref has tag",
ref: "gcr.io/library/busybox:latest",
expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
expectedRepoTag: "gcr.io/library/busybox:latest",
},
{
desc: "repo digest should be empty if original ref is schema1 and has no digest",
ref: "gcr.io/library/busybox:latest",
schema1: true,
expectedRepoDigest: "",
expectedRepoTag: "gcr.io/library/busybox:latest",
},
{
desc: "repo digest should not be empty if original ref is schema1 but has digest",
ref: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
schema1: true,
expectedRepoDigest: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
expectedRepoTag: "",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
named, err := docker.ParseDockerRef(test.ref)
assert.NoError(t, err)
repoDigest, repoTag := getRepoDigestAndTag(named, digest, test.schema1)
assert.Equal(t, test.expectedRepoDigest, repoDigest)
assert.Equal(t, test.expectedRepoTag, repoTag)
})
}
}
func TestImageGetLabels(t *testing.T) {
criService, _ := newTestCRIService()
tests := []struct {
name string
expectedLabel map[string]string
pinnedImages map[string]string
pullImageName string
}{
{
name: "pinned image labels should get added on sandbox image",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{"sandbox": "k8s.gcr.io/pause:3.9"},
pullImageName: "k8s.gcr.io/pause:3.9",
},
{
name: "pinned image labels should get added on sandbox image without tag",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{"sandboxnotag": "k8s.gcr.io/pause", "sandbox": "k8s.gcr.io/pause:latest"},
pullImageName: "k8s.gcr.io/pause:latest",
},
{
name: "pinned image labels should get added on sandbox image specified with tag and digest both",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{
"sandboxtagdigest": "k8s.gcr.io/pause:3.9@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
"sandbox": "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
},
pullImageName: "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
},
{
name: "pinned image labels should get added on sandbox image specified with digest",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{"sandbox": "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2"},
pullImageName: "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
},
{
name: "pinned image labels should not get added on other image",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue},
pinnedImages: map[string]string{"sandbox": "k8s.gcr.io/pause:3.9"},
pullImageName: "k8s.gcr.io/random:latest",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
criService.config.PinnedImages = tt.pinnedImages
labels := criService.getLabels(context.Background(), tt.pullImageName)
assert.Equal(t, tt.expectedLabel, labels)
})
}
}

View File

@@ -0,0 +1,79 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"fmt"
eventstypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// RemoveImage removes the image.
// TODO(random-liu): Update CRI to pass image reference instead of ImageSpec. (See
// kubernetes/kubernetes#46255)
// TODO(random-liu): We should change CRI to distinguish image id and image spec.
// Remove the whole image no matter the it's image id or reference. This is the
// semantic defined in CRI now.
func (c *GRPCCRIImageService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (*runtime.RemoveImageResponse, error) {
span := tracing.SpanFromContext(ctx)
// TODO: Move to separate function
image, err := c.LocalResolve(r.GetImage().GetImage())
if err != nil {
if errdefs.IsNotFound(err) {
span.AddEvent(err.Error())
// return empty without error when image not found.
return &runtime.RemoveImageResponse{}, nil
}
return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err)
}
span.SetAttributes(tracing.Attribute("image.id", image.ID))
// Remove all image references.
for i, ref := range image.References {
var opts []images.DeleteOpt
if i == len(image.References)-1 {
// Delete the last image reference synchronously to trigger garbage collection.
// This is best effort. It is possible that the image reference is deleted by
// someone else before this point.
opts = []images.DeleteOpt{images.SynchronousDelete()}
}
err = c.images.Delete(ctx, ref, opts...)
if err == nil || errdefs.IsNotFound(err) {
// Update image store to reflect the newest state in containerd.
if err := c.imageStore.Update(ctx, ref); err != nil {
return nil, fmt.Errorf("failed to update image reference %q for %q: %w", ref, image.ID, err)
}
if c.publisher != nil {
if err := c.publisher.Publish(ctx, "/images/delete", &eventstypes.ImageDelete{
Name: ref,
}); err != nil {
return nil, err
}
}
continue
}
return nil, fmt.Errorf("failed to delete image reference %q for %q: %w", ref, image.ID, err)
}
return &runtime.RemoveImageResponse{}, nil
}

View File

@@ -0,0 +1,132 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
"github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
"github.com/containerd/log"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ImageStatus returns the status of the image, returns nil if the image isn't present.
// TODO(random-liu): We should change CRI to distinguish image id and image spec. (See
// kubernetes/kubernetes#46255)
func (c *CRIImageService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (*runtime.ImageStatusResponse, error) {
span := tracing.SpanFromContext(ctx)
image, err := c.LocalResolve(r.GetImage().GetImage())
if err != nil {
if errdefs.IsNotFound(err) {
span.AddEvent(err.Error())
// return empty without error when image not found.
return &runtime.ImageStatusResponse{}, nil
}
return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err)
}
span.SetAttributes(tracing.Attribute("image.id", image.ID))
// TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
// doesn't exist?
runtimeImage := toCRIImage(image)
info, err := c.toCRIImageInfo(ctx, &image, r.GetVerbose())
if err != nil {
return nil, fmt.Errorf("failed to generate image info: %w", err)
}
return &runtime.ImageStatusResponse{
Image: runtimeImage,
Info: info,
}, nil
}
// toCRIImage converts internal image object to CRI runtime.Image.
func toCRIImage(image imagestore.Image) *runtime.Image {
repoTags, repoDigests := util.ParseImageReferences(image.References)
runtimeImage := &runtime.Image{
Id: image.ID,
RepoTags: repoTags,
RepoDigests: repoDigests,
Size_: uint64(image.Size),
Pinned: image.Pinned,
}
uid, username := getUserFromImage(image.ImageSpec.Config.User)
if uid != nil {
runtimeImage.Uid = &runtime.Int64Value{Value: *uid}
}
runtimeImage.Username = username
return runtimeImage
}
// getUserFromImage gets uid or user name of the image user.
// If user is numeric, it will be treated as uid; or else, it is treated as user name.
func getUserFromImage(user string) (*int64, string) {
// return both empty if user is not specified in the image.
if user == "" {
return nil, ""
}
// split instances where the id may contain user:group
user = strings.Split(user, ":")[0]
// user could be either uid or user name. Try to interpret as numeric uid.
uid, err := strconv.ParseInt(user, 10, 64)
if err != nil {
// If user is non numeric, assume it's user name.
return nil, user
}
// If user is a numeric uid.
return &uid, ""
}
// TODO (mikebrow): discuss moving this struct and / or constants for info map for some or all of these fields to CRI
type verboseImageInfo struct {
ChainID string `json:"chainID"`
ImageSpec imagespec.Image `json:"imageSpec"`
}
// toCRIImageInfo converts internal image object information to CRI image status response info map.
func (c *CRIImageService) toCRIImageInfo(ctx context.Context, image *imagestore.Image, verbose bool) (map[string]string, error) {
if !verbose {
return nil, nil
}
info := make(map[string]string)
imi := &verboseImageInfo{
ChainID: image.ChainID,
ImageSpec: image.ImageSpec,
}
m, err := json.Marshal(imi)
if err == nil {
info["info"] = string(m)
} else {
log.G(ctx).WithError(err).Errorf("failed to marshal info %v", imi)
info["info"] = err.Error()
}
return info, nil
}

View File

@@ -0,0 +1,139 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
"github.com/containerd/containerd/v2/internal/cri/util"
)
func TestImageStatus(t *testing.T) {
testID := "sha256:d848ce12891bf78792cda4a23c58984033b0c397a55e93a1556202222ecc5ed4" // #nosec G101
image := imagestore.Image{
ID: testID,
ChainID: "test-chain-id",
References: []string{
"gcr.io/library/busybox:latest",
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 1234,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "user:group",
},
},
}
expected := &runtime.Image{
Id: testID,
RepoTags: []string{"gcr.io/library/busybox:latest"},
RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(1234),
Username: "user",
}
c, g := newTestCRIService()
t.Logf("should return nil image spec without error for non-exist image")
resp, err := c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{
Image: &runtime.ImageSpec{Image: testID},
})
assert.NoError(t, err)
require.NotNil(t, resp)
assert.Nil(t, resp.GetImage())
c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image})
assert.NoError(t, err)
t.Logf("should return correct image status for exist image")
resp, err = g.ImageStatus(context.Background(), &runtime.ImageStatusRequest{
Image: &runtime.ImageSpec{Image: testID},
})
assert.NoError(t, err)
assert.NotNil(t, resp)
assert.Equal(t, expected, resp.GetImage())
}
func TestParseImageReferences(t *testing.T) {
refs := []string{
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"gcr.io/library/busybox:1.2",
"sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"arbitrary-ref",
}
expectedTags := []string{
"gcr.io/library/busybox:1.2",
}
expectedDigests := []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}
tags, digests := util.ParseImageReferences(refs)
assert.Equal(t, expectedTags, tags)
assert.Equal(t, expectedDigests, digests)
}
// TestGetUserFromImage tests the logic of getting image uid or user name of image user.
func TestGetUserFromImage(t *testing.T) {
newI64 := func(i int64) *int64 { return &i }
for _, test := range []struct {
desc string
user string
uid *int64
name string
}{
{
desc: "no gid",
user: "0",
uid: newI64(0),
},
{
desc: "uid/gid",
user: "0:1",
uid: newI64(0),
},
{
desc: "empty user",
user: "",
},
{
desc: "multiple separators",
user: "1:2:3",
uid: newI64(1),
},
{
desc: "root username",
user: "root:root",
name: "root",
},
{
desc: "username",
user: "test:test",
name: "test",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
actualUID, actualName := getUserFromImage(test.user)
assert.Equal(t, test.uid, actualUID)
assert.Equal(t, test.name, actualName)
})
}
}

View File

@@ -0,0 +1,83 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"time"
"github.com/containerd/containerd/v2/internal/cri/store/snapshot"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ImageFsInfo returns information of the filesystem that is used to store images.
// TODO(windows): Usage for windows is always 0 right now. Support this for windows.
// TODO(random-liu): Handle storage consumed by content store
func (c *CRIImageService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) {
snapshots := c.snapshotStore.List()
snapshotterFSInfos := map[string]snapshot.Snapshot{}
for _, sn := range snapshots {
if info, ok := snapshotterFSInfos[sn.Key.Snapshotter]; ok {
// Use the oldest timestamp as the timestamp of imagefs info.
if sn.Timestamp < info.Timestamp {
info.Timestamp = sn.Timestamp
}
info.Size += sn.Size
info.Inodes += sn.Inodes
snapshotterFSInfos[sn.Key.Snapshotter] = info
} else {
snapshotterFSInfos[sn.Key.Snapshotter] = snapshot.Snapshot{
Timestamp: sn.Timestamp,
Size: sn.Size,
Inodes: sn.Inodes,
}
}
}
var imageFilesystems []*runtime.FilesystemUsage
// Currently kubelet always consumes the first entry of the returned array,
// so put the default snapshotter as the first entry for compatibility.
if info, ok := snapshotterFSInfos[c.config.Snapshotter]; ok {
imageFilesystems = append(imageFilesystems, &runtime.FilesystemUsage{
Timestamp: info.Timestamp,
FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPaths[c.config.Snapshotter]},
UsedBytes: &runtime.UInt64Value{Value: info.Size},
InodesUsed: &runtime.UInt64Value{Value: info.Inodes},
})
delete(snapshotterFSInfos, c.config.Snapshotter)
} else {
imageFilesystems = append(imageFilesystems, &runtime.FilesystemUsage{
Timestamp: time.Now().UnixNano(),
FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPaths[c.config.Snapshotter]},
UsedBytes: &runtime.UInt64Value{Value: 0},
InodesUsed: &runtime.UInt64Value{Value: 0},
})
}
for snapshotter, info := range snapshotterFSInfos {
imageFilesystems = append(imageFilesystems, &runtime.FilesystemUsage{
Timestamp: info.Timestamp,
FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPaths[snapshotter]},
UsedBytes: &runtime.UInt64Value{Value: info.Size},
InodesUsed: &runtime.UInt64Value{Value: info.Inodes},
})
}
return &runtime.ImageFsInfoResponse{ImageFilesystems: imageFilesystems}, nil
}

View File

@@ -0,0 +1,80 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
snapshot "github.com/containerd/containerd/v2/core/snapshots"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
)
func TestImageFsInfo(t *testing.T) {
c, g := newTestCRIService()
snapshots := []snapshotstore.Snapshot{
{
Key: snapshotstore.Key{
Key: "key1",
Snapshotter: "overlayfs",
},
Kind: snapshot.KindActive,
Size: 10,
Inodes: 100,
Timestamp: 234567,
},
{
Key: snapshotstore.Key{
Key: "key2",
Snapshotter: "overlayfs",
},
Kind: snapshot.KindCommitted,
Size: 20,
Inodes: 200,
Timestamp: 123456,
},
{
Key: snapshotstore.Key{
Key: "key3",
Snapshotter: "overlayfs",
},
Kind: snapshot.KindView,
Size: 0,
Inodes: 0,
Timestamp: 345678,
},
}
expected := &runtime.FilesystemUsage{
Timestamp: 123456,
FsId: &runtime.FilesystemIdentifier{Mountpoint: testImageFSPath},
UsedBytes: &runtime.UInt64Value{Value: 30},
InodesUsed: &runtime.UInt64Value{Value: 300},
}
for _, sn := range snapshots {
c.snapshotStore.Add(sn)
}
resp, err := g.ImageFsInfo(context.Background(), &runtime.ImageFsInfoRequest{})
require.NoError(t, err)
stats := resp.GetImageFilesystems()
// stats[0] is for default snapshotter, stats[1] is for `overlayfs`
assert.Len(t, stats, 2)
assert.Equal(t, expected, stats[1])
}

View File

@@ -0,0 +1,53 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"github.com/docker/go-metrics"
prom "github.com/prometheus/client_golang/prometheus"
)
var (
imagePulls metrics.LabeledCounter
inProgressImagePulls metrics.Gauge
// image size in MB / image pull duration in seconds
imagePullThroughput prom.Histogram
)
func init() {
const (
namespace = "containerd"
subsystem = "cri_sandboxed"
)
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
ns := metrics.NewNamespace(namespace, subsystem, nil)
imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status")
inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total)
imagePullThroughput = prom.NewHistogram(
prom.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "image_pulling_throughput",
Help: "image pull throughput",
Buckets: prom.DefBuckets,
},
)
ns.Add(imagePullThroughput)
metrics.Register(ns)
}

View File

@@ -0,0 +1,198 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"time"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/snapshots"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
"github.com/containerd/containerd/v2/internal/kmutex"
"github.com/containerd/containerd/v2/pkg/events"
"github.com/containerd/log"
"github.com/containerd/platforms"
docker "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
type imageClient interface {
ListImages(context.Context, ...string) ([]containerd.Image, error)
GetImage(context.Context, string) (containerd.Image, error)
Pull(context.Context, string, ...containerd.RemoteOpt) (containerd.Image, error)
}
type ImagePlatform struct {
Snapshotter string
Platform platforms.Platform
}
type CRIImageService struct {
// config contains all image configurations.
config criconfig.ImageConfig
// images is the lower level image store used for raw storage,
// no event publishing should currently be assumed
images images.Store
// publisher is the events publisher
publisher events.Publisher
// client is a subset of the containerd client
// and will be replaced by image store and transfer service
client imageClient
// imageFSPaths contains path to image filesystem for snapshotters.
imageFSPaths map[string]string
// runtimePlatforms are the platforms configured for a runtime.
runtimePlatforms map[string]ImagePlatform
// imageStore stores all resources associated with images.
imageStore *imagestore.Store
// snapshotStore stores information of all snapshots.
snapshotStore *snapshotstore.Store
// unpackDuplicationSuppressor is used to make sure that there is only
// one in-flight fetch request or unpack handler for a given descriptor's
// or chain ID.
unpackDuplicationSuppressor kmutex.KeyedLocker
}
type GRPCCRIImageService struct {
*CRIImageService
}
type CRIImageServiceOptions struct {
Content content.Store
Images images.Store
ImageFSPaths map[string]string
RuntimePlatforms map[string]ImagePlatform
Snapshotters map[string]snapshots.Snapshotter
Publisher events.Publisher
Client imageClient
}
// NewService creates a new CRI Image Service
//
// TODO:
// 1. Generalize the image service and merge with a single higher level image service
// 2. Update the options to remove client and imageFSPath
// - Platform configuration with Array/Map of snapshotter names + filesystem ID + platform matcher + runtime to snapshotter
// - Transfer service implementation
// - Image Service (from metadata)
// - Content store (from metadata)
// 3. Separate image cache and snapshot cache to first class plugins, make the snapshot cache much more efficient and intelligent
func NewService(config criconfig.ImageConfig, options *CRIImageServiceOptions) (*CRIImageService, error) {
svc := CRIImageService{
config: config,
images: options.Images,
client: options.Client,
imageStore: imagestore.NewStore(options.Images, options.Content, platforms.Default()),
imageFSPaths: options.ImageFSPaths,
runtimePlatforms: options.RuntimePlatforms,
snapshotStore: snapshotstore.NewStore(),
unpackDuplicationSuppressor: kmutex.New(),
}
log.L.Info("Start snapshots syncer")
snapshotsSyncer := newSnapshotsSyncer(
svc.snapshotStore,
options.Snapshotters,
time.Duration(svc.config.StatsCollectPeriod)*time.Second,
)
snapshotsSyncer.start()
return &svc, nil
}
// LocalResolve resolves image reference locally and returns corresponding image metadata. It
// returns errdefs.ErrNotFound if the reference doesn't exist.
func (c *CRIImageService) LocalResolve(refOrID string) (imagestore.Image, error) {
getImageID := func(refOrId string) string {
if _, err := imagedigest.Parse(refOrID); err == nil {
return refOrID
}
return func(ref string) string {
// ref is not image id, try to resolve it locally.
// TODO(random-liu): Handle this error better for debugging.
normalized, err := docker.ParseDockerRef(ref)
if err != nil {
return ""
}
id, err := c.imageStore.Resolve(normalized.String())
if err != nil {
return ""
}
return id
}(refOrID)
}
imageID := getImageID(refOrID)
if imageID == "" {
// Try to treat ref as imageID
imageID = refOrID
}
return c.imageStore.Get(imageID)
}
// RuntimeSnapshotter overrides the default snapshotter if Snapshotter is set for this runtime.
// See https://github.com/containerd/containerd/issues/6657
// TODO: Pass in name and get back runtime platform
func (c *CRIImageService) RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string {
if ociRuntime.Snapshotter == "" {
return c.config.Snapshotter
}
log.G(ctx).Debugf("Set snapshotter for runtime %s to %s", ociRuntime.Type, ociRuntime.Snapshotter)
return ociRuntime.Snapshotter
}
// GetImage gets image metadata by image id.
func (c *CRIImageService) GetImage(id string) (imagestore.Image, error) {
return c.imageStore.Get(id)
}
// GetSnapshot returns the snapshot with specified key.
func (c *CRIImageService) GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error) {
snapshotKey := snapshotstore.Key{
Key: key,
Snapshotter: snapshotter,
}
return c.snapshotStore.Get(snapshotKey)
}
func (c *CRIImageService) ImageFSPaths() map[string]string {
return c.imageFSPaths
}
// PinnedImage is used to lookup a pinned image by name.
// Most often used to get the "sandbox" image.
func (c *CRIImageService) PinnedImage(name string) string {
return c.config.PinnedImages[name]
}
// GRPCService returns a new CRI Image Service grpc server.
func (c *CRIImageService) GRPCService() runtime.ImageServiceServer {
return &GRPCCRIImageService{c}
}

View File

@@ -0,0 +1,129 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
"github.com/stretchr/testify/assert"
)
const (
testImageFSPath = "/test/image/fs/path"
// Use an image id as test sandbox image to avoid image name resolve.
// TODO(random-liu): Change this to image name after we have complete image
// management unit test framework.
testSandboxImage = "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113798" // #nosec G101
)
// newTestCRIService creates a fake criService for test.
func newTestCRIService() (*CRIImageService, *GRPCCRIImageService) {
service := &CRIImageService{
config: testImageConfig,
runtimePlatforms: map[string]ImagePlatform{},
imageFSPaths: map[string]string{"overlayfs": testImageFSPath},
imageStore: imagestore.NewStore(nil, nil, platforms.Default()),
snapshotStore: snapshotstore.NewStore(),
}
return service, &GRPCCRIImageService{service}
}
var testImageConfig = criconfig.ImageConfig{
PinnedImages: map[string]string{
"sandbox": testSandboxImage,
},
}
func TestLocalResolve(t *testing.T) {
image := imagestore.Image{
ID: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
ChainID: "test-chain-id-1",
References: []string{
"docker.io/library/busybox:latest",
"docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 10,
}
c, _ := newTestCRIService()
var err error
c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image})
assert.NoError(t, err)
for _, ref := range []string{
"sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
"busybox",
"busybox:latest",
"busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"library/busybox",
"library/busybox:latest",
"library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"docker.io/busybox",
"docker.io/busybox:latest",
"docker.io/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"docker.io/library/busybox",
"docker.io/library/busybox:latest",
"docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
} {
img, err := c.LocalResolve(ref)
assert.NoError(t, err)
assert.Equal(t, image, img)
}
img, err := c.LocalResolve("randomid")
assert.Equal(t, errdefs.IsNotFound(err), true)
assert.Equal(t, imagestore.Image{}, img)
}
func TestRuntimeSnapshotter(t *testing.T) {
defaultRuntime := criconfig.Runtime{
Snapshotter: "",
}
fooRuntime := criconfig.Runtime{
Snapshotter: "devmapper",
}
for _, test := range []struct {
desc string
runtime criconfig.Runtime
expectSnapshotter string
}{
{
desc: "should return default snapshotter when runtime.Snapshotter is not set",
runtime: defaultRuntime,
expectSnapshotter: criconfig.DefaultImageConfig().Snapshotter,
},
{
desc: "should return overridden snapshotter when runtime.Snapshotter is set",
runtime: fooRuntime,
expectSnapshotter: "devmapper",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
cri, _ := newTestCRIService()
cri.config = criconfig.DefaultImageConfig()
assert.Equal(t, test.expectSnapshotter, cri.RuntimeSnapshotter(context.Background(), test.runtime))
})
}
}

View File

@@ -0,0 +1,130 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"fmt"
"time"
snapshot "github.com/containerd/containerd/v2/core/snapshots"
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/errdefs"
"github.com/containerd/log"
)
// snapshotsSyncer syncs snapshot stats periodically. imagefs info and container stats
// should both use cached result here.
// TODO(random-liu): Benchmark with high workload. We may need a statsSyncer instead if
// benchmark result shows that container cpu/memory stats also need to be cached.
type snapshotsSyncer struct {
store *snapshotstore.Store
snapshotters map[string]snapshot.Snapshotter
syncPeriod time.Duration
}
// newSnapshotsSyncer creates a snapshot syncer.
func newSnapshotsSyncer(store *snapshotstore.Store, snapshotters map[string]snapshot.Snapshotter,
period time.Duration) *snapshotsSyncer {
return &snapshotsSyncer{
store: store,
snapshotters: snapshotters,
syncPeriod: period,
}
}
// start starts the snapshots syncer. No stop function is needed because
// the syncer doesn't update any persistent states, it's fine to let it
// exit with the process.
func (s *snapshotsSyncer) start() {
tick := time.NewTicker(s.syncPeriod)
go func() {
defer tick.Stop()
// TODO(random-liu): This is expensive. We should do benchmark to
// check the resource usage and optimize this.
for {
if err := s.sync(); err != nil {
log.L.WithError(err).Error("Failed to sync snapshot stats")
}
<-tick.C
}
}()
}
// sync updates all snapshots stats.
func (s *snapshotsSyncer) sync() error {
ctx := ctrdutil.NamespacedContext()
start := time.Now().UnixNano()
for key, snapshotter := range s.snapshotters {
var snapshots []snapshot.Info
// Do not call `Usage` directly in collect function, because
// `Usage` takes time, we don't want `Walk` to hold read lock
// of snapshot metadata store for too long time.
// TODO(random-liu): Set timeout for the following 2 contexts.
if err := snapshotter.Walk(ctx, func(ctx context.Context, info snapshot.Info) error {
snapshots = append(snapshots, info)
return nil
}); err != nil {
return fmt.Errorf("walk all snapshots for %q failed: %w", key, err)
}
for _, info := range snapshots {
snapshotKey := snapshotstore.Key{
Key: info.Name,
Snapshotter: key,
}
sn, err := s.store.Get(snapshotKey)
if err == nil {
// Only update timestamp for non-active snapshot.
if sn.Kind == info.Kind && sn.Kind != snapshot.KindActive {
sn.Timestamp = time.Now().UnixNano()
s.store.Add(sn)
continue
}
}
// Get newest stats if the snapshot is new or active.
sn = snapshotstore.Snapshot{
Key: snapshotstore.Key{
Key: info.Name,
Snapshotter: key,
},
Kind: info.Kind,
Timestamp: time.Now().UnixNano(),
}
usage, err := snapshotter.Usage(ctx, info.Name)
if err != nil {
if !errdefs.IsNotFound(err) {
log.L.WithError(err).Errorf("Failed to get usage for snapshot %q", info.Name)
}
continue
}
sn.Size = uint64(usage.Size)
sn.Inodes = uint64(usage.Inodes)
s.store.Add(sn)
}
}
for _, sn := range s.store.List() {
if sn.Timestamp >= start {
continue
}
// Delete the snapshot stats if it's not updated this time.
s.store.Delete(sn.Key)
}
return nil
}

View File

@@ -0,0 +1,29 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) ListMetricDescriptors(context.Context, *runtime.ListMetricDescriptorsRequest) (*runtime.ListMetricDescriptorsResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method ListMetricDescriptors not implemented")
}

View File

@@ -0,0 +1,29 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) ListPodSandboxMetrics(context.Context, *runtime.ListPodSandboxMetricsRequest) (*runtime.ListPodSandboxMetricsResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method ListPodSandboxMetrics not implemented")
}

View File

@@ -0,0 +1,76 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"github.com/docker/go-metrics"
)
var (
sandboxListTimer metrics.Timer
sandboxCreateNetworkTimer metrics.Timer
sandboxDeleteNetwork metrics.Timer
sandboxRuntimeCreateTimer metrics.LabeledTimer
sandboxRuntimeStopTimer metrics.LabeledTimer
sandboxRemoveTimer metrics.LabeledTimer
containerListTimer metrics.Timer
containerRemoveTimer metrics.LabeledTimer
containerCreateTimer metrics.LabeledTimer
containerStopTimer metrics.LabeledTimer
containerStartTimer metrics.LabeledTimer
containerEventsDroppedCount metrics.Counter
networkPluginOperations metrics.LabeledCounter
networkPluginOperationsErrors metrics.LabeledCounter
networkPluginOperationsLatency metrics.LabeledTimer
)
func init() {
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
ns := metrics.NewNamespace("containerd", "cri_sandboxed", nil)
sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes")
sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox")
sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network")
sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime")
sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime")
sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime")
containerListTimer = ns.NewTimer("container_list", "time to list containers")
containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime")
containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime")
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start")
networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type")
networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")
networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type")
metrics.Register(ns)
}
// for backwards compatibility with kubelet/dockershim metrics
// https://github.com/containerd/containerd/issues/7801
const (
networkStatusOp = "get_pod_network_status"
networkSetUpOp = "set_up_pod"
networkTearDownOp = "tear_down_pod"
)

View File

@@ -0,0 +1,44 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
cstore "github.com/containerd/containerd/v2/internal/cri/store/container"
sstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
type criImplementation struct {
c *criService
}
func (i *criImplementation) Config() *criconfig.Config {
return &i.c.config
}
func (i *criImplementation) SandboxStore() *sstore.Store {
return i.c.sandboxStore
}
func (i *criImplementation) ContainerStore() *cstore.Store {
return i.c.containerStore
}
func (i *criImplementation) ContainerMetadataExtensionKey() string {
return crilabels.ContainerMetadataExtension
}

View File

@@ -0,0 +1,35 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"time"
cstore "github.com/containerd/containerd/v2/internal/cri/store/container"
cri "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (i *criImplementation) UpdateContainerResources(ctx context.Context, ctr cstore.Container, req *cri.UpdateContainerResourcesRequest, status cstore.Status) (cstore.Status, error) {
return i.c.updateContainerResources(ctx, ctr, req, status)
}
func (i *criImplementation) StopContainer(ctx context.Context, ctr cstore.Container, timeout time.Duration) error {
return i.c.stopContainer(ctx, ctr, timeout)
}

View File

@@ -0,0 +1,35 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"time"
cstore "github.com/containerd/containerd/v2/internal/cri/store/container"
cri "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (i *criImplementation) UpdateContainerResources(ctx context.Context, ctr cstore.Container, req *cri.UpdateContainerResourcesRequest, status cstore.Status) (cstore.Status, error) {
return cstore.Status{}, nil
}
func (i *criImplementation) StopContainer(ctx context.Context, ctr cstore.Container, timeout time.Duration) error {
return nil
}

View File

@@ -0,0 +1,148 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// TODO: these are copied from container_create_linux.go and should be consolidated later.
package podsandbox
import (
"errors"
"fmt"
"strconv"
"strings"
"github.com/containerd/containerd/v2/contrib/seccomp"
"github.com/containerd/containerd/v2/pkg/oci"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
const (
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
// runtimeDefault indicates that we should use or create a runtime default profile.
runtimeDefault = "runtime/default"
// dockerDefault indicates that we should use or create a docker default profile.
dockerDefault = "docker/default"
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
unconfinedProfile = "unconfined"
)
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
func (c *Controller) generateSeccompSpecOpts(sp *runtime.SecurityProfile, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
if privileged {
// Do not set seccomp profile when container is privileged
return nil, nil
}
if !seccompEnabled {
if sp != nil {
if sp.ProfileType != runtime.SecurityProfile_Unconfined {
return nil, errors.New("seccomp is not supported")
}
}
return nil, nil
}
if sp == nil {
return nil, nil
}
if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
return nil, errors.New("seccomp config invalid LocalhostRef must only be set if ProfileType is Localhost")
}
switch sp.ProfileType {
case runtime.SecurityProfile_Unconfined:
// Do not set seccomp profile.
return nil, nil
case runtime.SecurityProfile_RuntimeDefault:
return seccomp.WithDefaultProfile(), nil
case runtime.SecurityProfile_Localhost:
// trimming the localhost/ prefix just in case even though it should not
// be necessary with the new SecurityProfile struct
return seccomp.WithProfile(strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)), nil
default:
return nil, errors.New("seccomp unknown ProfileType")
}
}
func generateSeccompSecurityProfile(profilePath string, unsetProfilePath string) (*runtime.SecurityProfile, error) {
if profilePath != "" {
return generateSecurityProfile(profilePath)
}
if unsetProfilePath != "" {
return generateSecurityProfile(unsetProfilePath)
}
return nil, nil
}
func generateSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
switch profilePath {
case runtimeDefault, dockerDefault, "":
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_RuntimeDefault,
}, nil
case unconfinedProfile:
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_Unconfined,
}, nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(profilePath, profileNamePrefix) {
return nil, fmt.Errorf("invalid profile %q", profilePath)
}
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_Localhost,
LocalhostRef: strings.TrimPrefix(profilePath, profileNamePrefix),
}, nil
}
}
// generateUserString generates valid user string based on OCI Image Spec
// v1.0.0.
//
// CRI defines that the following combinations are valid:
//
// (none) -> ""
// username -> username
// username, uid -> username
// username, uid, gid -> username:gid
// username, gid -> username:gid
// uid -> uid
// uid, gid -> uid:gid
// gid -> error
//
// TODO(random-liu): Add group name support in CRI.
func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
var userstr, groupstr string
if uid != nil {
userstr = strconv.FormatInt(uid.GetValue(), 10)
}
if username != "" {
userstr = username
}
if gid != nil {
groupstr = strconv.FormatInt(gid.GetValue(), 10)
}
if userstr == "" {
if groupstr != "" {
return "", fmt.Errorf("user group %q is specified without user", groupstr)
}
return "", nil
}
if groupstr != "" {
userstr = userstr + ":" + groupstr
}
return userstr, nil
}

View File

@@ -0,0 +1,200 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"fmt"
"time"
"github.com/containerd/log"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
eventtypes "github.com/containerd/containerd/v2/api/events"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/sandbox"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/constants"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/containerd/v2/pkg/oci"
osinterface "github.com/containerd/containerd/v2/pkg/os"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
)
func init() {
registry.Register(&plugin.Registration{
Type: plugins.SandboxControllerPlugin,
ID: "podsandbox",
Requires: []plugin.Type{
plugins.EventPlugin,
plugins.LeasePlugin,
plugins.SandboxStorePlugin,
plugins.CRIServicePlugin,
plugins.ServicePlugin,
},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
client, err := containerd.New(
"",
containerd.WithDefaultNamespace(constants.K8sContainerdNamespace),
containerd.WithDefaultPlatform(platforms.Default()),
containerd.WithInMemoryServices(ic),
)
if err != nil {
return nil, fmt.Errorf("unable to init client for podsandbox: %w", err)
}
// Get runtime service.
criRuntimePlugin, err := ic.GetByID(plugins.CRIServicePlugin, "runtime")
if err != nil {
return nil, fmt.Errorf("unable to load CRI runtime service plugin dependency: %w", err)
}
runtimeService := criRuntimePlugin.(RuntimeService)
// Get image service.
criImagePlugin, err := ic.GetByID(plugins.CRIServicePlugin, "images")
if err != nil {
return nil, fmt.Errorf("unable to load CRI image service plugin dependency: %w", err)
}
c := Controller{
client: client,
config: runtimeService.Config(),
os: osinterface.RealOS{},
runtimeService: runtimeService,
imageService: criImagePlugin.(ImageService),
store: NewStore(),
}
return &c, nil
},
})
}
// CRIService interface contains things required by controller, but not yet refactored from criService.
// TODO: this will be removed in subsequent iterations.
type CRIService interface {
// TODO: we should implement Event backoff in Controller.
BackOffEvent(id string, event interface{})
}
// RuntimeService specifies dependencies to CRI runtime service.
type RuntimeService interface {
Config() criconfig.Config
LoadOCISpec(string) (*oci.Spec, error)
}
// ImageService specifies dependencies to CRI image service.
type ImageService interface {
LocalResolve(refOrID string) (imagestore.Image, error)
GetImage(id string) (imagestore.Image, error)
PullImage(ctx context.Context, name string, creds func(string) (string, string, error), sc *runtime.PodSandboxConfig) (string, error)
RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string
PinnedImage(string) string
}
type Controller struct {
// config contains all configurations.
config criconfig.Config
// client is an instance of the containerd client
client *containerd.Client
// runtimeService is a dependency to CRI runtime service.
runtimeService RuntimeService
// imageService is a dependency to CRI image service.
imageService ImageService
// os is an interface for all required os operations.
os osinterface.OS
// cri is CRI service that provides missing gaps needed by controller.
cri CRIService
store *Store
}
func (c *Controller) Init(
cri CRIService,
) {
c.cri = cri
}
var _ sandbox.Controller = (*Controller)(nil)
func (c *Controller) Platform(_ctx context.Context, _sandboxID string) (platforms.Platform, error) {
return platforms.DefaultSpec(), nil
}
func (c *Controller) Wait(ctx context.Context, sandboxID string) (sandbox.ExitStatus, error) {
podSandbox := c.store.Get(sandboxID)
if podSandbox == nil {
return sandbox.ExitStatus{}, fmt.Errorf("failed to get exit channel. %q", sandboxID)
}
exit, err := podSandbox.Wait(ctx)
if err != nil {
return sandbox.ExitStatus{}, fmt.Errorf("failed to wait pod sandbox, %w", err)
}
return sandbox.ExitStatus{
ExitStatus: exit.ExitCode(),
ExitedAt: exit.ExitTime(),
}, err
}
func (c *Controller) waitSandboxExit(ctx context.Context, p *types.PodSandbox, exitCh <-chan containerd.ExitStatus) (exitStatus uint32, exitedAt time.Time, err error) {
select {
case e := <-exitCh:
exitStatus, exitedAt, err = e.Result()
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to get task exit status for %q", p.ID)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
dctx := ctrdutil.NamespacedContext()
dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout)
defer dcancel()
event := &eventtypes.TaskExit{ExitStatus: exitStatus, ExitedAt: protobuf.ToTimestamp(exitedAt)}
if cleanErr := handleSandboxTaskExit(dctx, p, event); cleanErr != nil {
c.cri.BackOffEvent(p.ID, e)
}
return
case <-ctx.Done():
return unknownExitCode, time.Now(), ctx.Err()
}
}
// handleSandboxTaskExit handles TaskExit event for sandbox.
func handleSandboxTaskExit(ctx context.Context, sb *types.PodSandbox, e *eventtypes.TaskExit) error {
// No stream attached to sandbox container.
task, err := sb.Container.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to load task for sandbox: %w", err)
}
} else {
// TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
if _, err = task.Delete(ctx, WithNRISandboxDelete(sb.ID), containerd.WithProcessKill); err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to stop sandbox: %w", err)
}
}
}
return nil
}

View File

@@ -0,0 +1,91 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
containerd "github.com/containerd/containerd/v2/client"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ostesting "github.com/containerd/containerd/v2/pkg/os/testing"
)
const (
testRootDir = "/test/root"
testStateDir = "/test/state"
)
var testConfig = criconfig.Config{
RootDir: testRootDir,
StateDir: testStateDir,
RuntimeConfig: criconfig.RuntimeConfig{
TolerateMissingHugetlbController: true,
},
}
// newControllerService creates a fake criService for test.
func newControllerService() *Controller {
return &Controller{
config: testConfig,
os: ostesting.NewFakeOS(),
store: NewStore(),
}
}
func Test_Status(t *testing.T) {
sandboxID, pid, exitStatus := "1", uint32(1), uint32(0)
createdAt, exitedAt := time.Now(), time.Now()
controller := newControllerService()
sb := types.NewPodSandbox(sandboxID, sandboxstore.Status{
State: sandboxstore.StateReady,
Pid: pid,
CreatedAt: createdAt,
})
sb.Metadata = sandboxstore.Metadata{ID: sandboxID}
err := controller.store.Save(sb)
if err != nil {
t.Fatal(err)
}
s, err := controller.Status(context.Background(), sandboxID, false)
if err != nil {
t.Fatal(err)
}
assert.Equal(t, s.Pid, pid)
assert.Equal(t, s.CreatedAt, createdAt)
assert.Equal(t, s.State, sandboxstore.StateReady.String())
sb.Exit(*containerd.NewExitStatus(exitStatus, exitedAt, nil))
exit, err := controller.Wait(context.Background(), sandboxID)
if err != nil {
t.Fatal(err)
}
assert.Equal(t, exit.ExitStatus, exitStatus)
assert.Equal(t, exit.ExitedAt, exitedAt)
s, err = controller.Status(context.Background(), sandboxID, false)
if err != nil {
t.Fatal(err)
}
assert.Equal(t, s.State, sandboxstore.StateNotReady.String())
}

View File

@@ -0,0 +1,209 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"fmt"
"path"
"path/filepath"
"time"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
docker "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
clabels "github.com/containerd/containerd/v2/pkg/labels"
"github.com/containerd/containerd/v2/pkg/oci"
)
const (
// sandboxesDir contains all sandbox root. A sandbox root is the running
// directory of the sandbox, all files created for the sandbox will be
// placed under this directory.
sandboxesDir = "sandboxes"
// MetadataKey is the key used for storing metadata in the sandbox extensions
MetadataKey = "metadata"
)
const (
// unknownExitCode is the exit code when exit reason is unknown.
unknownExitCode = 255
)
const (
handleEventTimeout = 10 * time.Second
)
// getSandboxRootDir returns the root directory for managing sandbox files,
// e.g. hosts files.
func (c *Controller) getSandboxRootDir(id string) string {
return filepath.Join(c.config.RootDir, sandboxesDir, id)
}
// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files,
// e.g. named pipes.
func (c *Controller) getVolatileSandboxRootDir(id string) string {
return filepath.Join(c.config.StateDir, sandboxesDir, id)
}
// getRepoDigestAngTag returns image repoDigest and repoTag of the named image reference.
func getRepoDigestAndTag(namedRef docker.Named, digest imagedigest.Digest, schema1 bool) (string, string) {
var repoTag, repoDigest string
if _, ok := namedRef.(docker.NamedTagged); ok {
repoTag = namedRef.String()
}
if _, ok := namedRef.(docker.Canonical); ok {
repoDigest = namedRef.String()
} else if !schema1 {
// digest is not actual repo digest for schema1 image.
repoDigest = namedRef.Name() + "@" + digest.String()
}
return repoDigest, repoTag
}
// toContainerdImage converts an image object in image store to containerd image handler.
func (c *Controller) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) {
// image should always have at least one reference.
if len(image.References) == 0 {
return nil, fmt.Errorf("invalid image with no reference %q", image.ID)
}
return c.client.GetImage(ctx, image.References[0])
}
// buildLabel builds the labels from config to be passed to containerd
func buildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string {
labels := make(map[string]string)
for k, v := range imageConfigLabels {
if err := clabels.Validate(k, v); err == nil {
labels[k] = v
} else {
// In case the image label is invalid, we output a warning and skip adding it to the
// container.
log.L.WithError(err).Warnf("unable to add image label with key %s to the container", k)
}
}
// labels from the CRI request (config) will override labels in the image config
for k, v := range configLabels {
labels[k] = v
}
labels[crilabels.ContainerKindLabel] = containerType
return labels
}
// parseImageReferences parses a list of arbitrary image references and returns
// the repotags and repodigests
func parseImageReferences(refs []string) ([]string, []string) {
var tags, digests []string
for _, ref := range refs {
parsed, err := docker.ParseAnyReference(ref)
if err != nil {
continue
}
if _, ok := parsed.(docker.Canonical); ok {
digests = append(digests, parsed.String())
} else if _, ok := parsed.(docker.Tagged); ok {
tags = append(tags, parsed.String())
}
}
return tags, digests
}
// getPassthroughAnnotations filters requested pod annotations by comparing
// against permitted annotations for the given runtime.
func getPassthroughAnnotations(podAnnotations map[string]string,
runtimePodAnnotations []string) (passthroughAnnotations map[string]string) {
passthroughAnnotations = make(map[string]string)
for podAnnotationKey, podAnnotationValue := range podAnnotations {
for _, pattern := range runtimePodAnnotations {
// Use path.Match instead of filepath.Match here.
// filepath.Match treated `\\` as path separator
// on windows, which is not what we want.
if ok, _ := path.Match(pattern, podAnnotationKey); ok {
passthroughAnnotations[podAnnotationKey] = podAnnotationValue
}
}
}
return passthroughAnnotations
}
// runtimeSpec returns a default runtime spec used in cri-containerd.
func (c *Controller) runtimeSpec(id string, baseSpecFile string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
// GenerateSpec needs namespace.
ctx := ctrdutil.NamespacedContext()
container := &containers.Container{ID: id}
if baseSpecFile != "" {
baseSpec, err := c.runtimeService.LoadOCISpec(baseSpecFile)
if err != nil {
return nil, fmt.Errorf("can't load base OCI spec %q: %w", baseSpecFile, err)
}
spec := oci.Spec{}
if err := ctrdutil.DeepCopy(&spec, &baseSpec); err != nil {
return nil, fmt.Errorf("failed to clone OCI spec: %w", err)
}
// Fix up cgroups path
applyOpts := append([]oci.SpecOpts{oci.WithNamespacedCgroup()}, opts...)
if err := oci.ApplyOpts(ctx, nil, container, &spec, applyOpts...); err != nil {
return nil, fmt.Errorf("failed to apply OCI options: %w", err)
}
return &spec, nil
}
spec, err := oci.GenerateSpec(ctx, nil, container, opts...)
if err != nil {
return nil, fmt.Errorf("failed to generate spec: %w", err)
}
return spec, nil
}
func getMetadata(ctx context.Context, container containerd.Container) (*sandboxstore.Metadata, error) {
// Load sandbox metadata.
exts, err := container.Extensions(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox container extensions: %w", err)
}
ext, ok := exts[crilabels.SandboxMetadataExtension]
if !ok {
return nil, fmt.Errorf("metadata extension %q not found", crilabels.SandboxMetadataExtension)
}
data, err := typeurl.UnmarshalAny(ext)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal metadata extension %q: %w", ext, err)
}
meta, ok := data.(*sandboxstore.Metadata)
if !ok {
return nil, fmt.Errorf("failed to convert the extension to sandbox metadata")
}
return meta, nil
}

View File

@@ -0,0 +1,349 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"sort"
"strings"
"syscall"
"time"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/seccomp"
"github.com/containerd/containerd/v2/pkg/seutil"
"github.com/containerd/log"
"github.com/moby/sys/mountinfo"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
const (
// defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938).
defaultSandboxOOMAdj = -998
// defaultShmSize is the default size of the sandbox shm.
defaultShmSize = int64(1024 * 1024 * 64)
// relativeRootfsPath is the rootfs path relative to bundle path.
relativeRootfsPath = "rootfs"
// devShm is the default path of /dev/shm.
devShm = "/dev/shm"
// etcHosts is the default path of /etc/hosts file.
etcHosts = "/etc/hosts"
// resolvConfPath is the abs path of resolv.conf on host or container.
resolvConfPath = "/etc/resolv.conf"
)
// getCgroupsPath generates container cgroups path.
func getCgroupsPath(cgroupsParent, id string) string {
base := path.Base(cgroupsParent)
if strings.HasSuffix(base, ".slice") {
// For a.slice/b.slice/c.slice, base is c.slice.
// runc systemd cgroup path format is "slice:prefix:name".
return strings.Join([]string{base, "cri-containerd", id}, ":")
}
return filepath.Join(cgroupsParent, id)
}
// getSandboxHostname returns the hostname file path inside the sandbox root directory.
func (c *Controller) getSandboxHostname(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hostname")
}
// getSandboxHosts returns the hosts file path inside the sandbox root directory.
func (c *Controller) getSandboxHosts(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hosts")
}
// getResolvPath returns resolv.conf filepath for specified sandbox.
func (c *Controller) getResolvPath(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
}
// getSandboxDevShm returns the shm file path inside the sandbox root directory.
func (c *Controller) getSandboxDevShm(id string) string {
return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
}
func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) {
var labels []string
if selinuxOptions == nil {
return nil, nil
}
if err := checkSelinuxLevel(selinuxOptions.Level); err != nil {
return nil, err
}
if selinuxOptions.User != "" {
labels = append(labels, "user:"+selinuxOptions.User)
}
if selinuxOptions.Role != "" {
labels = append(labels, "role:"+selinuxOptions.Role)
}
if selinuxOptions.Type != "" {
labels = append(labels, "type:"+selinuxOptions.Type)
}
if selinuxOptions.Level != "" {
labels = append(labels, "level:"+selinuxOptions.Level)
}
return labels, nil
}
func initLabelsFromOpt(selinuxOpts *runtime.SELinuxOption) (string, string, error) {
labels, err := toLabel(selinuxOpts)
if err != nil {
return "", "", err
}
return label.InitLabels(labels)
}
func checkSelinuxLevel(level string) error {
if len(level) == 0 {
return nil
}
matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level)
if err != nil {
return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err)
}
if !matched {
return fmt.Errorf("the format of 'level' %q is not correct", level)
}
return nil
}
func (c *Controller) seccompEnabled() bool {
return seccomp.IsEnabled()
}
// unmountRecursive unmounts the target and all mounts underneath, starting with
// the deepest mount first.
func unmountRecursive(ctx context.Context, target string) error {
target, err := mount.CanonicalizePath(target)
if err != nil {
return err
}
toUnmount, err := mountinfo.GetMounts(mountinfo.PrefixFilter(target))
if err != nil {
return err
}
// Make the deepest mount be first
sort.Slice(toUnmount, func(i, j int) bool {
return len(toUnmount[i].Mountpoint) > len(toUnmount[j].Mountpoint)
})
for i, m := range toUnmount {
if err := mount.UnmountAll(m.Mountpoint, unix.MNT_DETACH); err != nil {
if i == len(toUnmount)-1 { // last mount
return err
}
// This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem
log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", m.Mountpoint)
}
}
return nil
}
// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `ensureRemoveAll` if you really want to make every effort to remove
// a directory.
//
// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
// can be a race between reading directory entries and then actually attempting
// to remove everything in the directory.
// These types of errors do not need to be returned since it's ok for the dir to
// be gone we can just retry the remove operation.
//
// This should not return a `os.ErrNotExist` kind of error under any circumstances
func ensureRemoveAll(ctx context.Context, dir string) error {
notExistErr := make(map[string]bool)
// track retries
exitOnErr := make(map[string]int)
maxRetry := 50
// Attempt to unmount anything beneath this dir first.
if err := unmountRecursive(ctx, dir); err != nil {
log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir)
}
for {
err := os.RemoveAll(dir)
if err == nil {
return nil
}
pe, ok := err.(*os.PathError)
if !ok {
return err
}
if os.IsNotExist(err) {
if notExistErr[pe.Path] {
return err
}
notExistErr[pe.Path] = true
// There is a race where some subdir can be removed but after the
// parent dir entries have been read.
// So the path could be from `os.Remove(subdir)`
// If the reported non-existent path is not the passed in `dir` we
// should just retry, but otherwise return with no error.
if pe.Path == dir {
return nil
}
continue
}
if pe.Err != syscall.EBUSY {
return err
}
if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil {
return fmt.Errorf("error while removing %s: %w", dir, e)
}
if exitOnErr[pe.Path] == maxRetry {
return err
}
exitOnErr[pe.Path]++
time.Sleep(100 * time.Millisecond)
}
}
var vmbasedRuntimes = []string{
"io.containerd.kata",
}
func isVMBasedRuntime(runtimeType string) bool {
for _, rt := range vmbasedRuntimes {
if strings.Contains(runtimeType, rt) {
return true
}
}
return false
}
func modifyProcessLabel(runtimeType string, spec *runtimespec.Spec) error {
if !isVMBasedRuntime(runtimeType) {
return nil
}
l, err := seutil.ChangeToKVM(spec.Process.SelinuxLabel)
if err != nil {
return fmt.Errorf("failed to get selinux kvm label: %w", err)
}
spec.Process.SelinuxLabel = l
return nil
}
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]runtimespec.LinuxIDMapping, error) {
var m []runtimespec.LinuxIDMapping
if len(runtimeIDMap) == 0 {
return m, nil
}
if len(runtimeIDMap) > 1 {
// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
}
// We know len is 1 now.
if runtimeIDMap[0] == nil {
return m, nil
}
uidMap := *runtimeIDMap[0]
if uidMap.Length < 1 {
return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
}
m = []runtimespec.LinuxIDMapping{
{
ContainerID: uidMap.ContainerId,
HostID: uidMap.HostId,
Size: uidMap.Length,
},
}
return m, nil
}
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []runtimespec.LinuxIDMapping, retErr error) {
if userns == nil {
// If userns is not set, the kubelet doesn't support this option
// and we should just fallback to no userns. This is completely
// valid.
return nil, nil, nil
}
uids, err := parseUsernsIDMap(userns.GetUids())
if err != nil {
return nil, nil, fmt.Errorf("UID mapping: %w", err)
}
gids, err = parseUsernsIDMap(userns.GetGids())
if err != nil {
return nil, nil, fmt.Errorf("GID mapping: %w", err)
}
switch mode := userns.GetMode(); mode {
case runtime.NamespaceMode_NODE:
if len(uids) != 0 || len(gids) != 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
}
case runtime.NamespaceMode_POD:
// This is valid, we will handle it in WithPodNamespaces().
if len(uids) == 0 || len(gids) == 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
}
default:
return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
return uids, gids, nil
}
func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
snapshotOpt := []snapshots.Opt{}
usernsOpts := nsOpts.GetUsernsOptions()
if usernsOpts == nil {
return snapshotOpt, nil
}
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
}
return snapshotOpt, nil
}

View File

@@ -0,0 +1,107 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"golang.org/x/sys/unix"
)
func TestGetCgroupsPath(t *testing.T) {
testID := "test-id"
for _, test := range []struct {
desc string
cgroupsParent string
expected string
}{
{
desc: "should support regular cgroup path",
cgroupsParent: "/a/b",
expected: "/a/b/test-id",
},
{
desc: "should support systemd cgroup path",
cgroupsParent: "/a.slice/b.slice",
expected: "b.slice:cri-containerd:test-id",
},
{
desc: "should support tailing slash for regular cgroup path",
cgroupsParent: "/a/b/",
expected: "/a/b/test-id",
},
{
desc: "should support tailing slash for systemd cgroup path",
cgroupsParent: "/a.slice/b.slice/",
expected: "b.slice:cri-containerd:test-id",
},
{
desc: "should treat root cgroup as regular cgroup path",
cgroupsParent: "/",
expected: "/test-id",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getCgroupsPath(test.cgroupsParent, testID)
assert.Equal(t, test.expected, got)
})
}
}
func TestEnsureRemoveAllWithMount(t *testing.T) {
if os.Getuid() != 0 {
t.Skip("skipping test that requires root")
}
var err error
dir1 := t.TempDir()
dir2 := t.TempDir()
bindDir := filepath.Join(dir1, "bind")
if err := os.MkdirAll(bindDir, 0755); err != nil {
t.Fatal(err)
}
if err := unix.Mount(dir2, bindDir, "none", unix.MS_BIND, ""); err != nil {
t.Fatal(err)
}
done := make(chan struct{})
go func() {
err = ensureRemoveAll(context.Background(), dir1)
close(done)
}()
select {
case <-done:
if err != nil {
t.Fatal(err)
}
case <-time.After(5 * time.Second):
t.Fatal("timeout waiting for EnsureRemoveAll to finish")
}
if _, err := os.Stat(dir1); !os.IsNotExist(err) {
t.Fatalf("expected %q to not exist", dir1)
}
}

View File

@@ -0,0 +1,38 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"os"
"github.com/opencontainers/runtime-spec/specs-go"
)
// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `ensureRemoveAll` if you really want to make every effort to remove
// a directory.
func ensureRemoveAll(ctx context.Context, dir string) error {
return os.RemoveAll(dir)
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
return nil
}

View File

@@ -0,0 +1,180 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"testing"
"github.com/opencontainers/selinux/go-selinux"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestInitSelinuxOpts(t *testing.T) {
if !selinux.GetEnabled() {
t.Skip("selinux is not enabled")
}
for _, test := range []struct {
desc string
selinuxOpt *runtime.SELinuxOption
processLabel string
mountLabel string
expectErr bool
}{
{
desc: "Should return empty strings for processLabel and mountLabel when selinuxOpt is nil",
selinuxOpt: nil,
processLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}",
mountLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}",
},
{
desc: "Should overlay fields on processLabel when selinuxOpt has been initialized partially",
selinuxOpt: &runtime.SELinuxOption{
User: "",
Role: "user_r",
Type: "",
Level: "s0:c1,c2",
},
processLabel: "system_u:user_r:(container_file_t|svirt_lxc_net_t):s0:c1,c2",
mountLabel: "system_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2",
},
{
desc: "Should be resolved correctly when selinuxOpt has been initialized completely",
selinuxOpt: &runtime.SELinuxOption{
User: "user_u",
Role: "user_r",
Type: "user_t",
Level: "s0:c1,c2",
},
processLabel: "user_u:user_r:user_t:s0:c1,c2",
mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2",
},
{
desc: "Should be resolved correctly when selinuxOpt has been initialized with level=''",
selinuxOpt: &runtime.SELinuxOption{
User: "user_u",
Role: "user_r",
Type: "user_t",
Level: "",
},
processLabel: "user_u:user_r:user_t:s0:c[0-9]{1,3},c[0-9]{1,3}",
mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0",
},
{
desc: "Should return error when the format of 'level' is not correct",
selinuxOpt: &runtime.SELinuxOption{
User: "user_u",
Role: "user_r",
Type: "user_t",
Level: "s0,c1,c2",
},
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
processLabel, mountLabel, err := initLabelsFromOpt(test.selinuxOpt)
if test.expectErr {
assert.Error(t, err)
} else {
assert.Regexp(t, test.processLabel, processLabel)
assert.Regexp(t, test.mountLabel, mountLabel)
}
})
}
}
func TestCheckSelinuxLevel(t *testing.T) {
for _, test := range []struct {
desc string
level string
expectNoMatch bool
}{
{
desc: "s0",
level: "s0",
},
{
desc: "s0-s0",
level: "s0-s0",
},
{
desc: "s0:c0",
level: "s0:c0",
},
{
desc: "s0:c0.c3",
level: "s0:c0.c3",
},
{
desc: "s0:c0,c3",
level: "s0:c0,c3",
},
{
desc: "s0-s0:c0,c3",
level: "s0-s0:c0,c3",
},
{
desc: "s0-s0:c0,c3.c6",
level: "s0-s0:c0,c3.c6",
},
{
desc: "s0-s0:c0,c3.c6,c8.c10",
level: "s0-s0:c0,c3.c6,c8.c10",
},
{
desc: "s0-s0:c0,c3.c6,c8,c10",
level: "s0-s0:c0,c3.c6",
},
{
desc: "s0,c0,c3",
level: "s0,c0,c3",
expectNoMatch: true,
},
{
desc: "s0:c0.c3.c6",
level: "s0:c0.c3.c6",
expectNoMatch: true,
},
{
desc: "s0-s0,c0,c3",
level: "s0-s0,c0,c3",
expectNoMatch: true,
},
{
desc: "s0-s0:c0.c3.c6",
level: "s0-s0:c0.c3.c6",
expectNoMatch: true,
},
{
desc: "s0-s0:c0,c3.c6.c8",
level: "s0-s0:c0,c3.c6.c8",
expectNoMatch: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
err := checkSelinuxLevel(test.level)
if test.expectNoMatch {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}

View File

@@ -0,0 +1,336 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"os"
"strings"
"testing"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
"github.com/containerd/containerd/v2/pkg/oci"
docker "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
)
func TestGetRepoDigestAndTag(t *testing.T) {
digest := imagedigest.Digest("sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582")
for _, test := range []struct {
desc string
ref string
schema1 bool
expectedRepoDigest string
expectedRepoTag string
}{
{
desc: "repo tag should be empty if original ref has no tag",
ref: "gcr.io/library/busybox@" + digest.String(),
expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
},
{
desc: "repo tag should not be empty if original ref has tag",
ref: "gcr.io/library/busybox:latest",
expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
expectedRepoTag: "gcr.io/library/busybox:latest",
},
{
desc: "repo digest should be empty if original ref is schema1 and has no digest",
ref: "gcr.io/library/busybox:latest",
schema1: true,
expectedRepoDigest: "",
expectedRepoTag: "gcr.io/library/busybox:latest",
},
{
desc: "repo digest should not be empty if original ref is schema1 but has digest",
ref: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
schema1: true,
expectedRepoDigest: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
expectedRepoTag: "",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
named, err := docker.ParseDockerRef(test.ref)
assert.NoError(t, err)
repoDigest, repoTag := getRepoDigestAndTag(named, digest, test.schema1)
assert.Equal(t, test.expectedRepoDigest, repoDigest)
assert.Equal(t, test.expectedRepoTag, repoTag)
})
}
}
func TestBuildLabels(t *testing.T) {
imageConfigLabels := map[string]string{
"a": "z",
"d": "y",
"long-label": strings.Repeat("example", 10000),
}
configLabels := map[string]string{
"a": "b",
"c": "d",
}
newLabels := buildLabels(configLabels, imageConfigLabels, crilabels.ContainerKindSandbox)
assert.Len(t, newLabels, 4)
assert.Equal(t, "b", newLabels["a"])
assert.Equal(t, "d", newLabels["c"])
assert.Equal(t, "y", newLabels["d"])
assert.Equal(t, crilabels.ContainerKindSandbox, newLabels[crilabels.ContainerKindLabel])
assert.NotContains(t, newLabels, "long-label")
newLabels["a"] = "e"
assert.Empty(t, configLabels[crilabels.ContainerKindLabel], "should not add new labels into original label")
assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label")
}
func TestParseImageReferences(t *testing.T) {
refs := []string{
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"gcr.io/library/busybox:1.2",
"sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"arbitrary-ref",
}
expectedTags := []string{
"gcr.io/library/busybox:1.2",
}
expectedDigests := []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}
tags, digests := parseImageReferences(refs)
assert.Equal(t, expectedTags, tags)
assert.Equal(t, expectedDigests, digests)
}
func TestEnvDeduplication(t *testing.T) {
for _, test := range []struct {
desc string
existing []string
kv [][2]string
expected []string
}{
{
desc: "single env",
kv: [][2]string{
{"a", "b"},
},
expected: []string{"a=b"},
},
{
desc: "multiple envs",
kv: [][2]string{
{"a", "b"},
{"c", "d"},
{"e", "f"},
},
expected: []string{
"a=b",
"c=d",
"e=f",
},
},
{
desc: "env override",
kv: [][2]string{
{"k1", "v1"},
{"k2", "v2"},
{"k3", "v3"},
{"k3", "v4"},
{"k1", "v5"},
{"k4", "v6"},
},
expected: []string{
"k1=v5",
"k2=v2",
"k3=v4",
"k4=v6",
},
},
{
desc: "existing env",
existing: []string{
"k1=v1",
"k2=v2",
"k3=v3",
},
kv: [][2]string{
{"k3", "v4"},
{"k2", "v5"},
{"k4", "v6"},
},
expected: []string{
"k1=v1",
"k2=v5",
"k3=v4",
"k4=v6",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
var spec runtimespec.Spec
if len(test.existing) > 0 {
spec.Process = &runtimespec.Process{
Env: test.existing,
}
}
for _, kv := range test.kv {
oci.WithEnv([]string{kv[0] + "=" + kv[1]})(context.Background(), nil, nil, &spec)
}
assert.Equal(t, test.expected, spec.Process.Env)
})
}
}
func TestPassThroughAnnotationsFilter(t *testing.T) {
for _, test := range []struct {
desc string
podAnnotations map[string]string
runtimePodAnnotations []string
passthroughAnnotations map[string]string
}{
{
desc: "should support direct match",
podAnnotations: map[string]string{"c": "d", "d": "e"},
runtimePodAnnotations: []string{"c"},
passthroughAnnotations: map[string]string{"c": "d"},
},
{
desc: "should support wildcard match",
podAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
runtimePodAnnotations: []string{"*.f", "z*g", "y.c*"},
passthroughAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"y.ca": "b",
},
},
{
desc: "should support wildcard match all",
podAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
runtimePodAnnotations: []string{"*"},
passthroughAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
},
{
desc: "should support match including path separator",
podAnnotations: map[string]string{
"matchend.com/end": "1",
"matchend.com/end1": "2",
"matchend.com/1end": "3",
"matchmid.com/mid": "4",
"matchmid.com/mi1d": "5",
"matchmid.com/mid1": "6",
"matchhead.com/head": "7",
"matchhead.com/1head": "8",
"matchhead.com/head1": "9",
"matchall.com/abc": "10",
"matchall.com/def": "11",
"end/matchend": "12",
"end1/matchend": "13",
"1end/matchend": "14",
"mid/matchmid": "15",
"mi1d/matchmid": "16",
"mid1/matchmid": "17",
"head/matchhead": "18",
"1head/matchhead": "19",
"head1/matchhead": "20",
"abc/matchall": "21",
"def/matchall": "22",
"match1/match2": "23",
"nomatch/nomatch": "24",
},
runtimePodAnnotations: []string{
"matchend.com/end*",
"matchmid.com/mi*d",
"matchhead.com/*head",
"matchall.com/*",
"end*/matchend",
"mi*d/matchmid",
"*head/matchhead",
"*/matchall",
"match*/match*",
},
passthroughAnnotations: map[string]string{
"matchend.com/end": "1",
"matchend.com/end1": "2",
"matchmid.com/mid": "4",
"matchmid.com/mi1d": "5",
"matchhead.com/head": "7",
"matchhead.com/1head": "8",
"matchall.com/abc": "10",
"matchall.com/def": "11",
"end/matchend": "12",
"end1/matchend": "13",
"mid/matchmid": "15",
"mi1d/matchmid": "16",
"head/matchhead": "18",
"1head/matchhead": "19",
"abc/matchall": "21",
"def/matchall": "22",
"match1/match2": "23",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
passthroughAnnotations := getPassthroughAnnotations(test.podAnnotations, test.runtimePodAnnotations)
assert.Equal(t, test.passthroughAnnotations, passthroughAnnotations)
})
}
}
func TestEnsureRemoveAllNotExist(t *testing.T) {
// should never return an error for a non-existent path
if err := ensureRemoveAll(context.Background(), "/non/existent/path"); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithDir(t *testing.T) {
dir := t.TempDir()
if err := ensureRemoveAll(context.Background(), dir); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithFile(t *testing.T) {
tmp, err := os.CreateTemp("", "test-ensure-removeall-with-dir")
if err != nil {
t.Fatal(err)
}
tmp.Close()
if err := ensureRemoveAll(context.Background(), tmp.Name()); err != nil {
t.Fatal(err)
}
}

View File

@@ -0,0 +1,33 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"os"
"github.com/opencontainers/runtime-spec/specs-go"
)
// ensureRemoveAll is a wrapper for os.RemoveAll on Windows.
func ensureRemoveAll(_ context.Context, dir string) error {
return os.RemoveAll(dir)
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
return nil
}

View File

@@ -0,0 +1,51 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/log"
"github.com/containerd/nri"
v1 "github.com/containerd/nri/types/v1"
)
// WithNRISandboxDelete calls delete for a sandbox'd task
func WithNRISandboxDelete(sandboxID string) containerd.ProcessDeleteOpts {
return func(ctx context.Context, p containerd.Process) error {
task, ok := p.(containerd.Task)
if !ok {
return nil
}
nric, err := nri.New()
if err != nil {
log.G(ctx).WithError(err).Error("unable to create nri client")
return nil
}
if nric == nil {
return nil
}
sb := &nri.Sandbox{
ID: sandboxID,
}
if _, err := nric.InvokeWithSandbox(ctx, task, v1.Delete, sb); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to delete nri for %q", task.ID())
}
return nil
}
}

View File

@@ -0,0 +1,179 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"fmt"
goruntime "runtime"
"time"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
sandbox2 "github.com/containerd/containerd/v2/core/sandbox"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/containerd/v2/pkg/netns"
"github.com/containerd/errdefs"
)
// loadContainerTimeout is the default timeout for loading a container/sandbox.
// One container/sandbox hangs (e.g. containerd#2438) should not affect other
// containers/sandboxes.
// Most CRI container/sandbox related operations are per container, the ones
// which handle multiple containers at a time are:
// * ListPodSandboxes: Don't talk with containerd services.
// * ListContainers: Don't talk with containerd services.
// * ListContainerStats: Not in critical code path, a default timeout will
// be applied at CRI level.
// * Recovery logic: We should set a time for each container/sandbox recovery.
// * Event monitor: We should set a timeout for each container/sandbox event handling.
const loadContainerTimeout = 10 * time.Second
func (c *Controller) RecoverContainer(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
defer cancel()
var sandbox sandboxstore.Sandbox
meta, err := getMetadata(ctx, cntr)
if err != nil {
return sandbox, err
}
// Load sandbox created timestamp.
info, err := cntr.Info(ctx)
if err != nil {
return sandbox, fmt.Errorf("failed to get sandbox container info: %w", err)
}
s, ch, err := func() (sandboxstore.Status, <-chan containerd.ExitStatus, error) {
status := sandboxstore.Status{
State: sandboxstore.StateUnknown,
}
var channel <-chan containerd.ExitStatus
status.CreatedAt = info.CreatedAt
// Load sandbox state.
t, err := cntr.Task(ctx, nil)
if err != nil && !errdefs.IsNotFound(err) {
return status, channel, fmt.Errorf("failed to load task: %w", err)
}
var taskStatus containerd.Status
var notFound bool
if errdefs.IsNotFound(err) {
// Task is not found.
notFound = true
} else {
// Task is found. Get task status.
taskStatus, err = t.Status(ctx)
if err != nil {
// It's still possible that task is deleted during this window.
if !errdefs.IsNotFound(err) {
return status, channel, fmt.Errorf("failed to get task status: %w", err)
}
notFound = true
}
}
if notFound {
// Task does not exist, set sandbox state as NOTREADY.
status.State = sandboxstore.StateNotReady
} else {
if taskStatus.Status == containerd.Running {
status.State = sandboxstore.StateReady
status.Pid = t.Pid()
exitCh, err := t.Wait(ctrdutil.NamespacedContext())
if err != nil {
return status, channel, fmt.Errorf("failed to wait for sandbox container task: %w", err)
}
channel = exitCh
} else {
// Task is not running. Delete the task and set sandbox state as NOTREADY.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
return status, channel, fmt.Errorf("failed to delete task: %w", err)
}
status.State = sandboxstore.StateNotReady
}
}
return status, channel, nil
}()
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to load sandbox status for %q", cntr.ID())
}
// save it to cache in the podsandbox controller
podSandbox := types.NewPodSandbox(cntr.ID(), s)
podSandbox.Container = cntr
if meta != nil {
podSandbox.Metadata = *meta
}
podSandbox.Runtime = sandbox2.RuntimeOpts{
Name: info.Runtime.Name,
Options: info.Runtime.Options,
}
if ch != nil {
go func() {
code, exitTime, err := c.waitSandboxExit(ctrdutil.NamespacedContext(), podSandbox, ch)
podSandbox.Exit(*containerd.NewExitStatus(code, exitTime, err))
}()
}
if err := c.store.Save(podSandbox); err != nil {
return sandbox, fmt.Errorf("failed to save pod sandbox container in mem store: %w", err)
}
sandbox = sandboxstore.NewSandbox(*meta, s)
sandbox.Container = cntr
// Load network namespace.
sandbox.NetNS = getNetNS(meta)
// It doesn't matter whether task is running or not. If it is running, sandbox
// status will be `READY`; if it is not running, sandbox status will be `NOT_READY`,
// kubelet will stop the sandbox which will properly cleanup everything.
return sandbox, nil
}
func getNetNS(meta *sandboxstore.Metadata) *netns.NetNS {
// Don't need to load netns for host network sandbox.
if hostNetwork(meta.Config) {
return nil
}
return netns.LoadNetNS(meta.NetNSPath)
}
// hostNetwork handles checking if host networking was requested.
// TODO: Copy pasted from sbserver to handle container sandbox events in podsandbox/ package, needs refactoring.
func hostNetwork(config *runtime.PodSandboxConfig) bool {
var hostNet bool
switch goruntime.GOOS {
case "windows":
// Windows HostProcess pods can only run on the host network
hostNet = config.GetWindows().GetSecurityContext().GetHostProcess()
case "darwin":
// No CNI on Darwin yet.
hostNet = true
default:
// Even on other platforms, the logic containerd uses is to check if NamespaceMode == NODE.
// So this handles Linux, as well as any other platforms not governed by the cases above
// that have special quirks.
hostNet = config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
}
return hostNet
}

View File

@@ -0,0 +1,125 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"fmt"
apitasks "github.com/containerd/containerd/v2/api/services/tasks/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/errdefs"
"github.com/containerd/log"
)
func (c *Controller) Shutdown(ctx context.Context, sandboxID string) error {
sandbox := c.store.Get(sandboxID)
if sandbox == nil {
// Do not return error if the id doesn't exist.
log.G(ctx).Tracef("Sandbox controller Delete called for sandbox %q that does not exist", sandboxID)
return nil
}
// Cleanup the sandbox root directories.
sandboxRootDir := c.getSandboxRootDir(sandboxID)
if err := ensureRemoveAll(ctx, sandboxRootDir); err != nil {
return fmt.Errorf("failed to remove sandbox root directory %q: %w", sandboxRootDir, err)
}
volatileSandboxRootDir := c.getVolatileSandboxRootDir(sandboxID)
if err := ensureRemoveAll(ctx, volatileSandboxRootDir); err != nil {
return fmt.Errorf("failed to remove volatile sandbox root directory %q: %w",
volatileSandboxRootDir, err)
}
// Delete sandbox container.
if sandbox.Container != nil {
if err := c.cleanupSandboxTask(ctx, sandbox.Container); err != nil {
return fmt.Errorf("failed to delete sandbox task %q: %w", sandboxID, err)
}
if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to delete sandbox container %q: %w", sandboxID, err)
}
log.G(ctx).Tracef("Sandbox controller Delete called for sandbox container %q that does not exist", sandboxID)
}
}
c.store.Remove(sandboxID)
return nil
}
func (c *Controller) cleanupSandboxTask(ctx context.Context, sbCntr containerd.Container) error {
task, err := sbCntr.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to load task for sandbox: %w", err)
}
} else {
if _, err = task.Delete(ctx, containerd.WithProcessKill); err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to stop sandbox: %w", err)
}
}
}
// NOTE: Both sb.Container.Task and task.Delete interface always ensures
// that the status of target task. However, the interfaces return
// ErrNotFound, which doesn't mean that the shim instance doesn't exist.
//
// There are two caches for task in containerd:
//
// 1. io.containerd.service.v1.tasks-service
// 2. io.containerd.runtime.v2.task
//
// First one is to maintain the shim connection and shutdown the shim
// in Delete API. And the second one is to maintain the lifecycle of
// task in shim server.
//
// So, if the shim instance is running and task has been deleted in shim
// server, the sb.Container.Task and task.Delete will receive the
// ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
// shim will be leaky.
//
// Based on containerd/containerd#7496 issue, when host is under IO
// pressure, the umount2 syscall will take more than 10 seconds so that
// the CRI plugin will cancel this task.Delete call. However, the shim
// server isn't aware about this. After return from umount2 syscall, the
// shim server continue delete the task record. And then CRI plugin
// retries to delete task and retrieves ErrNotFound and marks it as
// stopped. Therefore, The shim is leaky.
//
// It's hard to handle the connection lost or request canceled cases in
// shim server. We should call Delete API to io.containerd.service.v1.tasks-service
// to ensure that shim instance is shutdown.
//
// REF:
// 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
// 2. https://github.com/containerd/containerd/issues/8931
if errdefs.IsNotFound(err) {
_, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: sbCntr.ID()})
if err != nil {
err = errdefs.FromGRPC(err)
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to cleanup sandbox %s in task-service: %w", sbCntr.ID(), err)
}
}
log.G(ctx).Infof("Ensure that sandbox %s in task-service has been cleanup successfully", sbCntr.ID())
}
return nil
}

View File

@@ -0,0 +1,316 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"errors"
"fmt"
"github.com/containerd/log"
"github.com/containerd/nri"
v1 "github.com/containerd/nri/types/v1"
"github.com/containerd/typeurl/v2"
"github.com/davecgh/go-spew/spew"
"github.com/opencontainers/selinux/go-selinux"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/sandbox"
"github.com/containerd/containerd/v2/core/snapshots"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
customopts "github.com/containerd/containerd/v2/internal/cri/opts"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/errdefs"
)
func init() {
typeurl.Register(&sandboxstore.Metadata{},
"github.com/containerd/cri/pkg/store/sandbox", "Metadata")
}
type CleanupErr struct {
error
}
// Start creates resources required for the sandbox and starts the sandbox. If an error occurs, Start attempts to tear
// down the created resources. If an error occurs while tearing down resources, a zero-valued response is returned
// alongside the error. If the teardown was successful, a nil response is returned with the error.
// TODO(samuelkarp) Determine whether this error indication is reasonable to retain once controller.Delete is implemented.
func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.ControllerInstance, retErr error) {
var cleanupErr error
defer func() {
if retErr != nil && cleanupErr != nil {
log.G(ctx).WithField("id", id).WithError(cleanupErr).Errorf("failed to fully teardown sandbox resources after earlier error: %s", retErr)
retErr = errors.Join(retErr, CleanupErr{cleanupErr})
}
}()
podSandbox := c.store.Get(id)
if podSandbox == nil {
return cin, fmt.Errorf("unable to find pod sandbox with id %q: %w", id, errdefs.ErrNotFound)
}
metadata := podSandbox.Metadata
var (
config = metadata.Config
labels = map[string]string{}
)
sandboxImage := c.imageService.PinnedImage("sandbox")
if sandboxImage == "" {
sandboxImage = criconfig.DefaultSandboxImage
}
// Ensure sandbox container image snapshot.
image, err := c.ensureImageExists(ctx, sandboxImage, config)
if err != nil {
return cin, fmt.Errorf("failed to get sandbox image %q: %w", sandboxImage, err)
}
containerdImage, err := c.toContainerdImage(ctx, *image)
if err != nil {
return cin, fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err)
}
ociRuntime, err := c.config.GetSandboxRuntime(config, metadata.RuntimeHandler)
if err != nil {
return cin, fmt.Errorf("failed to get sandbox runtime: %w", err)
}
log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime)
labels["oci_runtime_type"] = ociRuntime.Type
// Create sandbox container.
// NOTE: sandboxContainerSpec SHOULD NOT have side
// effect, e.g. accessing/creating files, so that we can test
// it safely.
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, metadata.NetNSPath, ociRuntime.PodAnnotations)
if err != nil {
return cin, fmt.Errorf("failed to generate sandbox container spec: %w", err)
}
log.G(ctx).WithField("podsandboxid", id).Debugf("sandbox container spec: %#+v", spew.NewFormatter(spec))
metadata.ProcessLabel = spec.Process.SelinuxLabel
defer func() {
if retErr != nil {
selinux.ReleaseLabel(metadata.ProcessLabel)
}
}()
labels["selinux_label"] = metadata.ProcessLabel
// handle any KVM based runtime
if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil {
return cin, err
}
if config.GetLinux().GetSecurityContext().GetPrivileged() {
// If privileged don't set selinux label, but we still record the MCS label so that
// the unused label can be freed later.
spec.Process.SelinuxLabel = ""
}
// Generate spec options that will be applied to the spec later.
specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config)
if err != nil {
return cin, fmt.Errorf("failed to generate sandbox container spec options: %w", err)
}
sandboxLabels := buildLabels(config.Labels, image.ImageSpec.Config.Labels, crilabels.ContainerKindSandbox)
snapshotterOpt := []snapshots.Opt{snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))}
extraSOpts, err := sandboxSnapshotterOpts(config)
if err != nil {
return cin, err
}
snapshotterOpt = append(snapshotterOpt, extraSOpts...)
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.imageService.RuntimeSnapshotter(ctx, ociRuntime)),
customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt...),
containerd.WithSpec(spec, specOpts...),
containerd.WithContainerLabels(sandboxLabels),
containerd.WithContainerExtension(crilabels.SandboxMetadataExtension, &metadata),
containerd.WithRuntime(ociRuntime.Type, podSandbox.Runtime.Options),
}
container, err := c.client.NewContainer(ctx, id, opts...)
if err != nil {
return cin, fmt.Errorf("failed to create containerd container: %w", err)
}
podSandbox.Container = container
defer func() {
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id)
}
podSandbox.Container = nil
}
}()
// Create sandbox container root directories.
sandboxRootDir := c.getSandboxRootDir(id)
if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
return cin, fmt.Errorf("failed to create sandbox root directory %q: %w",
sandboxRootDir, err)
}
defer func() {
if retErr != nil && cleanupErr == nil {
// Cleanup the sandbox root directory.
if cleanupErr = c.os.RemoveAll(sandboxRootDir); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove sandbox root directory %q",
sandboxRootDir)
}
}
}()
volatileSandboxRootDir := c.getVolatileSandboxRootDir(id)
if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil {
return cin, fmt.Errorf("failed to create volatile sandbox root directory %q: %w",
volatileSandboxRootDir, err)
}
defer func() {
if retErr != nil && cleanupErr == nil {
// Cleanup the volatile sandbox root directory.
if cleanupErr = c.os.RemoveAll(volatileSandboxRootDir); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove volatile sandbox root directory %q",
volatileSandboxRootDir)
}
}
}()
// Setup files required for the sandbox.
if err = c.setupSandboxFiles(id, config); err != nil {
return cin, fmt.Errorf("failed to setup sandbox files: %w", err)
}
defer func() {
if retErr != nil && cleanupErr == nil {
if cleanupErr = c.cleanupSandboxFiles(id, config); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to cleanup sandbox files in %q",
sandboxRootDir)
}
}
}()
// Update sandbox created timestamp.
info, err := container.Info(ctx)
if err != nil {
return cin, fmt.Errorf("failed to get sandbox container info: %w", err)
}
podSandbox.CreatedAt = info.CreatedAt
// Create sandbox task in containerd.
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).", id, metadata.Name)
var taskOpts []containerd.NewTaskOpts
if ociRuntime.Path != "" {
taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path))
}
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
return cin, fmt.Errorf("failed to create containerd task: %w", err)
}
defer func() {
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Cleanup the sandbox container if an error is returned.
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
cleanupErr = err
}
}
}()
podSandbox.Pid = task.Pid()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return cin, fmt.Errorf("failed to wait for sandbox container task: %w", err)
}
nric, err := nri.New()
if err != nil {
return cin, fmt.Errorf("unable to create nri client: %w", err)
}
if nric != nil {
nriSB := &nri.Sandbox{
ID: id,
Labels: config.Labels,
}
if _, err := nric.InvokeWithSandbox(ctx, task, v1.Create, nriSB); err != nil {
return cin, fmt.Errorf("nri invoke: %w", err)
}
}
if err := task.Start(ctx); err != nil {
return cin, fmt.Errorf("failed to start sandbox container task %q: %w", id, err)
}
podSandbox.State = sandboxstore.StateReady
cin.SandboxID = id
cin.Pid = task.Pid()
cin.CreatedAt = info.CreatedAt
cin.Labels = labels
go func() {
code, exitTime, err := c.waitSandboxExit(ctrdutil.NamespacedContext(), podSandbox, exitCh)
podSandbox.Exit(*containerd.NewExitStatus(code, exitTime, err))
}()
return
}
func (c *Controller) Create(_ctx context.Context, info sandbox.Sandbox, opts ...sandbox.CreateOpt) error {
metadata := sandboxstore.Metadata{}
if err := info.GetExtension(MetadataKey, &metadata); err != nil {
return fmt.Errorf("failed to get sandbox %q metadata: %w", info.ID, err)
}
podSandbox := types.NewPodSandbox(info.ID, sandboxstore.Status{State: sandboxstore.StateUnknown})
podSandbox.Metadata = metadata
podSandbox.Runtime = info.Runtime
return c.store.Save(podSandbox)
}
func (c *Controller) ensureImageExists(ctx context.Context, ref string, config *runtime.PodSandboxConfig) (*imagestore.Image, error) {
image, err := c.imageService.LocalResolve(ref)
if err != nil && !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to get image %q: %w", ref, err)
}
if err == nil {
return &image, nil
}
// Pull image to ensure the image exists
// TODO: Cleaner interface
imageID, err := c.imageService.PullImage(ctx, ref, nil, config)
if err != nil {
return nil, fmt.Errorf("failed to pull image %q: %w", ref, err)
}
newImage, err := c.imageService.GetImage(imageID)
if err != nil {
// It's still possible that someone removed the image right after it is pulled.
return nil, fmt.Errorf("failed to get image %q after pulling: %w", imageID, err)
}
return &newImage, nil
}

View File

@@ -0,0 +1,348 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"fmt"
"os"
"strconv"
"strings"
"github.com/containerd/containerd/v2/pkg/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/internal/cri/annotations"
customopts "github.com/containerd/containerd/v2/internal/cri/opts"
"github.com/containerd/containerd/v2/pkg/userns"
)
func (c *Controller) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) {
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
specOpts := []oci.SpecOpts{
oci.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
oci.WithEnv(imageConfig.Env),
oci.WithRootFSReadonly(),
oci.WithHostname(config.GetHostname()),
}
if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
// Pause image must have entrypoint or cmd.
return nil, fmt.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
}
specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
// Set cgroups parent.
if c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDisabledCgroups)
} else {
if config.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
}
}
// When cgroup parent is not set, containerd-shim will create container in a child cgroup
// of the cgroup itself is in.
// TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified.
// Set namespace options.
var (
securityContext = config.GetLinux().GetSecurityContext()
nsOptions = securityContext.GetNamespaceOptions()
)
if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace))
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace))
} else {
specOpts = append(specOpts, oci.WithLinuxNamespace(
runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
Path: nsPath,
}))
}
if nsOptions.GetPid() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace))
}
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
}
usernsOpts := nsOptions.GetUsernsOptions()
uids, gids, err := parseUsernsIDs(usernsOpts)
var usernsEnabled bool
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts != nil {
switch mode := usernsOpts.GetMode(); mode {
case runtime.NamespaceMode_NODE:
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
case runtime.NamespaceMode_POD:
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
usernsEnabled = true
default:
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
}
// It's fine to generate the spec before the sandbox /dev/shm
// is actually created.
sandboxDevShm := c.getSandboxDevShm(id)
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
// Remove the default /dev/shm mount from defaultMounts, it is added in oci/mounts.go.
specOpts = append(specOpts, oci.WithoutMounts(devShm))
// When user-namespace is enabled, the `nosuid, nodev, noexec` flags are
// required, otherwise the remount will fail with EPERM. Just use them
// unconditionally, they are nice to have anyways.
specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{
{
Source: sandboxDevShm,
Destination: devShm,
Type: "bind",
Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"},
},
// Add resolv.conf for katacontainers to setup the DNS of pod VM properly.
{
Source: c.getResolvPath(id),
Destination: resolvConfPath,
Type: "bind",
Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"},
},
}))
processLabel, mountLabel, err := initLabelsFromOpt(securityContext.GetSelinuxOptions())
if err != nil {
return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err)
}
defer func() {
if retErr != nil {
selinux.ReleaseLabel(processLabel)
}
}()
supplementalGroups := securityContext.GetSupplementalGroups()
specOpts = append(specOpts,
customopts.WithSelinuxLabels(processLabel, mountLabel),
customopts.WithSupplementalGroups(supplementalGroups),
)
// Add sysctls
sysctls := config.GetLinux().GetSysctls()
if sysctls == nil {
sysctls = make(map[string]string)
}
_, ipUnprivilegedPortStart := sysctls["net.ipv4.ip_unprivileged_port_start"]
_, pingGroupRange := sysctls["net.ipv4.ping_group_range"]
if nsOptions.GetNetwork() != runtime.NamespaceMode_NODE {
if c.config.EnableUnprivilegedPorts && !ipUnprivilegedPortStart {
sysctls["net.ipv4.ip_unprivileged_port_start"] = "0"
}
if c.config.EnableUnprivilegedICMP && !pingGroupRange && !userns.RunningInUserNS() && !usernsEnabled {
sysctls["net.ipv4.ping_group_range"] = "0 2147483647"
}
}
specOpts = append(specOpts, customopts.WithSysctls(sysctls))
// Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile
if !c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDefaultSandboxShares)
}
if res := config.GetLinux().GetResources(); res != nil {
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.SandboxCPUPeriod, strconv.FormatInt(res.CpuPeriod, 10)),
customopts.WithAnnotation(annotations.SandboxCPUQuota, strconv.FormatInt(res.CpuQuota, 10)),
customopts.WithAnnotation(annotations.SandboxCPUShares, strconv.FormatInt(res.CpuShares, 10)),
customopts.WithAnnotation(annotations.SandboxMem, strconv.FormatInt(res.MemoryLimitInBytes, 10)))
}
specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj))
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
runtimePodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts, annotations.DefaultCRIAnnotations(id, "", "", config, true)...)
return c.runtimeSpec(id, "", specOpts...)
}
// sandboxContainerSpecOpts generates OCI spec options for
// the sandbox container.
func (c *Controller) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
var (
securityContext = config.GetLinux().GetSecurityContext()
specOpts []oci.SpecOpts
err error
)
ssp := securityContext.GetSeccomp()
if ssp == nil {
ssp, err = generateSeccompSecurityProfile(
securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet
c.config.UnsetSeccompProfile)
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
}
}
seccompSpecOpts, err := c.generateSeccompSpecOpts(
ssp,
securityContext.GetPrivileged(),
c.seccompEnabled())
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
userstr, err := generateUserString(
"",
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup(),
)
if err != nil {
return nil, fmt.Errorf("failed to generate user string: %w", err)
}
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = imageConfig.User
}
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
}
return specOpts, nil
}
// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
// /etc/resolv.conf and /etc/hostname.
func (c *Controller) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
sandboxEtcHostname := c.getSandboxHostname(id)
hostname := config.GetHostname()
if hostname == "" {
var err error
hostname, err = c.os.Hostname()
if err != nil {
return fmt.Errorf("failed to get hostname: %w", err)
}
}
if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil {
return fmt.Errorf("failed to write hostname to %q: %w", sandboxEtcHostname, err)
}
// TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet.
sandboxEtcHosts := c.getSandboxHosts(id)
if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil {
return fmt.Errorf("failed to generate sandbox hosts file %q: %w", sandboxEtcHosts, err)
}
// Set DNS options. Maintain a resolv.conf for the sandbox.
resolvPath := c.getResolvPath(id)
if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
resolvContent, err := parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options)
if err != nil {
return fmt.Errorf("failed to parse sandbox DNSConfig %+v: %w", dnsConfig, err)
}
if err := c.os.WriteFile(resolvPath, []byte(resolvContent), 0644); err != nil {
return fmt.Errorf("failed to write resolv content to %q: %w", resolvPath, err)
}
} else {
// The DnsConfig was nil - we interpret that to mean "use the global
// default", which is dubious but backwards-compatible.
if err := c.os.CopyFile(resolvConfPath, resolvPath, 0644); err != nil {
return fmt.Errorf("failed to copy host's resolv.conf to %q: %w", resolvPath, err)
}
}
// Setup sandbox /dev/shm.
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
if _, err := c.os.Stat(devShm); err != nil {
return fmt.Errorf("host %q is not available for host ipc: %w", devShm, err)
}
} else {
sandboxDevShm := c.getSandboxDevShm(id)
if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil {
return fmt.Errorf("failed to create sandbox shm: %w", err)
}
shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize)
if err := c.os.Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil {
return fmt.Errorf("failed to mount sandbox shm: %w", err)
}
}
return nil
}
// parseDNSOptions parse DNS options into resolv.conf format content,
// if none option is specified, will return empty with no error.
func parseDNSOptions(servers, searches, options []string) (string, error) {
resolvContent := ""
if len(searches) > 0 {
resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " "))
}
if len(servers) > 0 {
resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver "))
}
if len(options) > 0 {
resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " "))
}
return resolvContent, nil
}
// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
func (c *Controller) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE {
path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/")
if err != nil {
return fmt.Errorf("failed to follow symlink: %w", err)
}
if err := c.os.Unmount(path); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to unmount %q: %w", path, err)
}
}
return nil
}
// sandboxSnapshotterOpts generates any platform specific snapshotter options
// for a sandbox container.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
}

View File

@@ -0,0 +1,773 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"os"
"path/filepath"
"strconv"
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
v1 "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/internal/cri/annotations"
"github.com/containerd/containerd/v2/internal/cri/opts"
ostesting "github.com/containerd/containerd/v2/pkg/os/testing"
)
func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
config := &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-name",
Uid: "test-uid",
Namespace: "test-ns",
Attempt: 1,
},
Hostname: "test-hostname",
LogDirectory: "test-log-directory",
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
Linux: &runtime.LinuxPodSandboxConfig{
CgroupParent: "/test/cgroup/parent",
},
}
imageConfig := &imagespec.ImageConfig{
Env: []string{"a=b", "c=d"},
Entrypoint: []string{"/pause"},
Cmd: []string{"forever"},
WorkingDir: "/workspace",
}
specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {
assert.Equal(t, "test-hostname", spec.Hostname)
assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath)
assert.Equal(t, relativeRootfsPath, spec.Root.Path)
assert.Equal(t, true, spec.Root.Readonly)
assert.Contains(t, spec.Process.Env, "a=b", "c=d")
assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args)
assert.Equal(t, "/workspace", spec.Process.Cwd)
assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, opts.DefaultSandboxCPUshares)
assert.EqualValues(t, *spec.Process.OOMScoreAdj, defaultSandboxOOMAdj)
t.Logf("Check PodSandbox annotations")
assert.Contains(t, spec.Annotations, annotations.SandboxID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id)
assert.Contains(t, spec.Annotations, annotations.ContainerType)
assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox)
assert.Contains(t, spec.Annotations, annotations.SandboxNamespace)
assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-ns")
assert.Contains(t, spec.Annotations, annotations.SandboxUID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-uid")
assert.Contains(t, spec.Annotations, annotations.SandboxName)
assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-name")
assert.Contains(t, spec.Annotations, annotations.SandboxLogDir)
assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory")
if selinux.GetEnabled() {
assert.NotEqual(t, "", spec.Process.SelinuxLabel)
assert.NotEqual(t, "", spec.Linux.MountLabel)
}
assert.Contains(t, spec.Mounts, runtimespec.Mount{
Source: "/test/root/sandboxes/test-id/resolv.conf",
Destination: resolvConfPath,
Type: "bind",
Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"},
})
}
return config, imageConfig, specCheck
}
func TestLinuxSandboxContainerSpec(t *testing.T) {
testID := "test-id"
nsPath := "test-cni"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
for _, test := range []struct {
desc string
configChange func(*runtime.PodSandboxConfig)
specCheck func(*testing.T, *runtimespec.Spec)
expectErr bool
}{
{
desc: "spec should reflect original config",
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
// runtime spec should have expected namespaces enabled by default.
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
Path: nsPath,
})
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UTSNamespace,
})
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.PIDNamespace,
})
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.IPCNamespace,
})
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
},
},
{
desc: "spec shouldn't have ping_group_range if userns are in use",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
},
},
{
desc: "host namespace",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_NODE,
Pid: runtime.NamespaceMode_NODE,
Ipc: runtime.NamespaceMode_NODE,
},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
// runtime spec should disable expected namespaces in host mode.
require.NotNil(t, spec.Linux)
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
})
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UTSNamespace,
})
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.PIDNamespace,
})
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.IPCNamespace,
})
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
},
},
{
desc: "user namespace",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
},
},
{
desc: "user namespace mode node and mappings",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
{
desc: "user namespace with several mappings",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
},
}
},
expectErr: true,
},
{
desc: "user namespace with uneven mappings",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
{
desc: "user namespace mode container",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_CONTAINER,
},
},
}
},
expectErr: true,
},
{
desc: "user namespace mode target",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_TARGET,
},
},
}
},
expectErr: true,
},
{
desc: "user namespace unknown mode",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode(100),
},
},
}
},
expectErr: true,
},
{
desc: "should set supplemental groups correctly",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
SupplementalGroups: []int64{1111, 2222},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Process)
assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111))
assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222))
},
},
{
desc: "should overwrite default sysctls",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.Sysctls = map[string]string{
"net.ipv4.ip_unprivileged_port_start": "500",
"net.ipv4.ping_group_range": "1 1000",
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Process)
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "500")
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "1 1000")
},
},
{
desc: "sandbox sizing annotations should be set if LinuxContainerResources were provided",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.Resources = &v1.LinuxContainerResources{
CpuPeriod: 100,
CpuQuota: 200,
CpuShares: 5000,
MemoryLimitInBytes: 1024,
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
value, ok := spec.Annotations[annotations.SandboxCPUPeriod]
assert.True(t, ok)
assert.EqualValues(t, strconv.FormatInt(100, 10), value)
assert.EqualValues(t, "100", value)
value, ok = spec.Annotations[annotations.SandboxCPUQuota]
assert.True(t, ok)
assert.EqualValues(t, "200", value)
value, ok = spec.Annotations[annotations.SandboxCPUShares]
assert.True(t, ok)
assert.EqualValues(t, "5000", value)
value, ok = spec.Annotations[annotations.SandboxMem]
assert.True(t, ok)
assert.EqualValues(t, "1024", value)
},
},
{
desc: "sandbox sizing annotations should not be set if LinuxContainerResources were not provided",
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
_, ok := spec.Annotations[annotations.SandboxCPUPeriod]
assert.False(t, ok)
_, ok = spec.Annotations[annotations.SandboxCPUQuota]
assert.False(t, ok)
_, ok = spec.Annotations[annotations.SandboxCPUShares]
assert.False(t, ok)
_, ok = spec.Annotations[annotations.SandboxMem]
assert.False(t, ok)
},
},
{
desc: "sandbox sizing annotations are zero if the resources are set to 0",
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.Resources = &v1.LinuxContainerResources{}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
value, ok := spec.Annotations[annotations.SandboxCPUPeriod]
assert.True(t, ok)
assert.EqualValues(t, "0", value)
value, ok = spec.Annotations[annotations.SandboxCPUQuota]
assert.True(t, ok)
assert.EqualValues(t, "0", value)
value, ok = spec.Annotations[annotations.SandboxCPUShares]
assert.True(t, ok)
assert.EqualValues(t, "0", value)
value, ok = spec.Annotations[annotations.SandboxMem]
assert.True(t, ok)
assert.EqualValues(t, "0", value)
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newControllerService()
c.config.EnableUnprivilegedICMP = true
c.config.EnableUnprivilegedPorts = true
config, imageConfig, specCheck := getRunPodSandboxTestData()
if test.configChange != nil {
test.configChange(config)
}
spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil)
if test.expectErr {
assert.Error(t, err)
assert.Nil(t, spec)
return
}
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, spec)
if test.specCheck != nil {
test.specCheck(t, spec)
}
})
}
}
func TestSetupSandboxFiles(t *testing.T) {
const (
testID = "test-id"
realhostname = "test-real-hostname"
)
for _, test := range []struct {
desc string
dnsConfig *runtime.DNSConfig
hostname string
ipcMode runtime.NamespaceMode
expectedCalls []ostesting.CalledDetail
}{
{
desc: "should check host /dev/shm existence when ipc mode is NODE",
ipcMode: runtime.NamespaceMode_NODE,
expectedCalls: []ostesting.CalledDetail{
{
Name: "Hostname",
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
[]byte(realhostname + "\n"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/hosts",
filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/resolv.conf",
filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
os.FileMode(0644),
},
},
{
Name: "Stat",
Arguments: []interface{}{"/dev/shm"},
},
},
},
{
desc: "should create new /etc/resolv.conf if DNSOptions is set",
dnsConfig: &runtime.DNSConfig{
Servers: []string{"8.8.8.8"},
Searches: []string{"114.114.114.114"},
Options: []string{"timeout:1"},
},
ipcMode: runtime.NamespaceMode_NODE,
expectedCalls: []ostesting.CalledDetail{
{
Name: "Hostname",
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
[]byte(realhostname + "\n"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/hosts",
filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
os.FileMode(0644),
},
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
[]byte(`search 114.114.114.114
nameserver 8.8.8.8
options timeout:1
`), os.FileMode(0644),
},
},
{
Name: "Stat",
Arguments: []interface{}{"/dev/shm"},
},
},
},
{
desc: "should create empty /etc/resolv.conf if DNSOptions is empty",
dnsConfig: &runtime.DNSConfig{},
ipcMode: runtime.NamespaceMode_NODE,
expectedCalls: []ostesting.CalledDetail{
{
Name: "Hostname",
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
[]byte(realhostname + "\n"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/hosts",
filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
os.FileMode(0644),
},
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
[]byte{},
os.FileMode(0644),
},
},
{
Name: "Stat",
Arguments: []interface{}{"/dev/shm"},
},
},
},
{
desc: "should copy host /etc/resolv.conf if DNSOptions is not set",
dnsConfig: nil,
ipcMode: runtime.NamespaceMode_NODE,
expectedCalls: []ostesting.CalledDetail{
{
Name: "Hostname",
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
[]byte(realhostname + "\n"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/hosts",
filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
filepath.Join("/etc/resolv.conf"),
filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
os.FileMode(0644),
},
},
{
Name: "Stat",
Arguments: []interface{}{"/dev/shm"},
},
},
},
{
desc: "should create sandbox shm when ipc namespace mode is not NODE",
ipcMode: runtime.NamespaceMode_POD,
expectedCalls: []ostesting.CalledDetail{
{
Name: "Hostname",
},
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
[]byte(realhostname + "\n"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/hosts",
filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/resolv.conf",
filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
os.FileMode(0644),
},
},
{
Name: "MkdirAll",
Arguments: []interface{}{
filepath.Join(testStateDir, sandboxesDir, testID, "shm"),
os.FileMode(0700),
},
},
{
Name: "Mount",
// Ignore arguments which are too complex to check.
},
},
},
{
desc: "should create /etc/hostname when hostname is set",
hostname: "test-hostname",
ipcMode: runtime.NamespaceMode_NODE,
expectedCalls: []ostesting.CalledDetail{
{
Name: "WriteFile",
Arguments: []interface{}{
filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
[]byte("test-hostname\n"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/hosts",
filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
os.FileMode(0644),
},
},
{
Name: "CopyFile",
Arguments: []interface{}{
"/etc/resolv.conf",
filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
os.FileMode(0644),
},
},
{
Name: "Stat",
Arguments: []interface{}{"/dev/shm"},
},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newControllerService()
c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) {
return realhostname, nil
}
cfg := &runtime.PodSandboxConfig{
Hostname: test.hostname,
DnsConfig: test.dnsConfig,
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Ipc: test.ipcMode,
},
},
},
}
c.setupSandboxFiles(testID, cfg)
calls := c.os.(*ostesting.FakeOS).GetCalls()
assert.Len(t, calls, len(test.expectedCalls))
for i, expected := range test.expectedCalls {
if expected.Arguments == nil {
// Ignore arguments.
expected.Arguments = calls[i].Arguments
}
assert.Equal(t, expected, calls[i])
}
})
}
}
func TestParseDNSOption(t *testing.T) {
for _, test := range []struct {
desc string
servers []string
searches []string
options []string
expectedContent string
expectErr bool
}{
{
desc: "empty dns options should return empty content",
},
{
desc: "non-empty dns options should return correct content",
servers: []string{"8.8.8.8", "server.google.com"},
searches: []string{"114.114.114.114"},
options: []string{"timeout:1"},
expectedContent: `search 114.114.114.114
nameserver 8.8.8.8
nameserver server.google.com
options timeout:1
`,
},
{
desc: "expanded dns config should return correct content on modern libc (e.g. glibc 2.26 and above)",
servers: []string{"8.8.8.8", "server.google.com"},
searches: []string{
"server0.google.com",
"server1.google.com",
"server2.google.com",
"server3.google.com",
"server4.google.com",
"server5.google.com",
"server6.google.com",
},
options: []string{"timeout:1"},
expectedContent: `search server0.google.com server1.google.com server2.google.com server3.google.com server4.google.com server5.google.com server6.google.com
nameserver 8.8.8.8
nameserver server.google.com
options timeout:1
`,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
resolvContent, err := parseDNSOptions(test.servers, test.searches, test.options)
if test.expectErr {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, resolvContent, test.expectedContent)
})
}
}
func TestSandboxDisableCgroup(t *testing.T) {
config, imageConfig, _ := getRunPodSandboxTestData()
c := newControllerService()
c.config.DisableCgroup = true
spec, err := c.sandboxContainerSpec("test-id", config, imageConfig, "test-cni", []string{})
require.NoError(t, err)
t.Log("resource limit should not be set")
assert.Nil(t, spec.Linux.Resources.Memory)
assert.Nil(t, spec.Linux.Resources.CPU)
t.Log("cgroup path should be empty")
assert.Empty(t, spec.Linux.CgroupsPath)
}
// TODO(random-liu): [P1] Add unit test for different error cases to make sure
// the function cleans up on error properly.

View File

@@ -0,0 +1,57 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/internal/cri/annotations"
"github.com/containerd/containerd/v2/pkg/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *Controller) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) {
return c.runtimeSpec(id, "", annotations.DefaultCRIAnnotations(id, "", "", config, true)...)
}
// sandboxContainerSpecOpts generates OCI spec options for
// the sandbox container.
func (c *Controller) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return []oci.SpecOpts{}, nil
}
// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
// /etc/resolv.conf and /etc/hostname.
func (c *Controller) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
return nil
}
// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
func (c *Controller) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
return nil
}
// sandboxSnapshotterOpts generates any platform specific snapshotter options
// for a sandbox container.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -0,0 +1,35 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
config := &runtime.PodSandboxConfig{}
imageConfig := &imagespec.ImageConfig{}
specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {
}
return config, imageConfig, specCheck
}

View File

@@ -0,0 +1,172 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
goruntime "runtime"
"testing"
"github.com/containerd/typeurl/v2"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
func TestSandboxContainerSpec(t *testing.T) {
switch goruntime.GOOS {
case "darwin":
t.Skip("not implemented on Darwin")
case "freebsd":
t.Skip("not implemented on FreeBSD")
}
testID := "test-id"
nsPath := "test-cni"
for _, test := range []struct {
desc string
configChange func(*runtime.PodSandboxConfig)
podAnnotations []string
imageConfigChange func(*imagespec.ImageConfig)
specCheck func(*testing.T, *runtimespec.Spec)
expectErr bool
}{
{
desc: "should return error when entrypoint and cmd are empty",
imageConfigChange: func(c *imagespec.ImageConfig) {
c.Entrypoint = nil
c.Cmd = nil
},
expectErr: true,
},
{
desc: "a passthrough annotation should be passed as an OCI annotation",
podAnnotations: []string{"c"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["c"], "d")
},
},
{
desc: "a non-passthrough annotation should not be passed as an OCI annotation",
configChange: func(c *runtime.PodSandboxConfig) {
c.Annotations["d"] = "e"
},
podAnnotations: []string{"c"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["c"], "d")
_, ok := spec.Annotations["d"]
assert.False(t, ok)
},
},
{
desc: "passthrough annotations should support wildcard match",
configChange: func(c *runtime.PodSandboxConfig) {
c.Annotations["t.f"] = "j"
c.Annotations["z.g"] = "o"
c.Annotations["z"] = "o"
c.Annotations["y.ca"] = "b"
c.Annotations["y"] = "b"
},
podAnnotations: []string{"t*", "z.*", "y.c*"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["t.f"], "j")
assert.Equal(t, spec.Annotations["z.g"], "o")
assert.Equal(t, spec.Annotations["y.ca"], "b")
_, ok := spec.Annotations["y"]
assert.False(t, ok)
_, ok = spec.Annotations["z"]
assert.False(t, ok)
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newControllerService()
config, imageConfig, specCheck := getRunPodSandboxTestData()
if test.configChange != nil {
test.configChange(config)
}
if test.imageConfigChange != nil {
test.imageConfigChange(imageConfig)
}
spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath,
test.podAnnotations)
if test.expectErr {
assert.Error(t, err)
assert.Nil(t, spec)
return
}
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, spec)
if test.specCheck != nil {
test.specCheck(t, spec)
}
})
}
}
func TestTypeurlMarshalUnmarshalSandboxMeta(t *testing.T) {
for _, test := range []struct {
desc string
configChange func(*runtime.PodSandboxConfig)
}{
{
desc: "should marshal original config",
},
{
desc: "should marshal Linux",
configChange: func(c *runtime.PodSandboxConfig) {
if c.Linux == nil {
c.Linux = &runtime.LinuxPodSandboxConfig{}
}
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_NODE,
Pid: runtime.NamespaceMode_NODE,
Ipc: runtime.NamespaceMode_NODE,
},
SupplementalGroups: []int64{1111, 2222},
}
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
meta := &sandboxstore.Metadata{
ID: "1",
Name: "sandbox_1",
NetNSPath: "/home/cloud",
}
meta.Config, _, _ = getRunPodSandboxTestData()
if test.configChange != nil {
test.configChange(meta.Config)
}
md, err := typeurl.MarshalAny(meta)
assert.NoError(t, err)
data, err := typeurl.UnmarshalAny(md)
assert.NoError(t, err)
assert.IsType(t, &sandboxstore.Metadata{}, data)
curMeta, ok := data.(*sandboxstore.Metadata)
assert.True(t, ok)
assert.Equal(t, meta, curMeta)
})
}
}

View File

@@ -0,0 +1,109 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"fmt"
"strconv"
"github.com/containerd/containerd/v2/pkg/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/internal/cri/annotations"
customopts "github.com/containerd/containerd/v2/internal/cri/opts"
)
func (c *Controller) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
specOpts := []oci.SpecOpts{
oci.WithEnv(imageConfig.Env),
oci.WithHostname(config.GetHostname()),
}
if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
// Pause image must have entrypoint or cmd.
return nil, fmt.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
}
specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
specOpts = append(specOpts,
// Clear the root location since hcsshim expects it.
// NOTE: readonly rootfs doesn't work on windows.
customopts.WithoutRoot,
oci.WithWindowsNetworkNamespace(nsPath),
)
specOpts = append(specOpts, customopts.WithWindowsDefaultSandboxShares)
// Start with the image config user and override below if RunAsUsername is not "".
username := imageConfig.User
runAsUser := config.GetWindows().GetSecurityContext().GetRunAsUsername()
if runAsUser != "" {
username = runAsUser
}
cs := config.GetWindows().GetSecurityContext().GetCredentialSpec()
if cs != "" {
specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs))
}
// There really isn't a good Windows way to verify that the username is available in the
// image as early as here like there is for Linux. Later on in the stack hcsshim
// will handle the behavior of erroring out if the user isn't available in the image
// when trying to run the init process.
specOpts = append(specOpts, oci.WithUser(username))
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
runtimePodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts, customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(config.GetWindows().GetSecurityContext().GetHostProcess())))
specOpts = append(specOpts,
annotations.DefaultCRIAnnotations(id, "", "", config, true)...,
)
return c.runtimeSpec(id, "", specOpts...)
}
// No sandbox container spec options for windows yet.
func (c *Controller) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return nil, nil
}
// No sandbox files needed for windows.
func (c *Controller) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
return nil
}
// No sandbox files needed for windows.
func (c *Controller) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
return nil
}
// No sandbox snapshotter options needed for windows.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -0,0 +1,111 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/internal/cri/annotations"
"github.com/containerd/containerd/v2/internal/cri/opts"
)
func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
config := &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-name",
Uid: "test-uid",
Namespace: "test-ns",
Attempt: 1,
},
Hostname: "test-hostname",
LogDirectory: "test-log-directory",
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{
RunAsUsername: "test-user",
CredentialSpec: "{\"test\": \"spec\"}",
HostProcess: false,
},
},
}
imageConfig := &imagespec.ImageConfig{
Env: []string{"a=b", "c=d"},
Entrypoint: []string{"/pause"},
Cmd: []string{"forever"},
WorkingDir: "/workspace",
User: "test-image-user",
}
specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {
assert.Equal(t, "test-hostname", spec.Hostname)
assert.Nil(t, spec.Root)
assert.Contains(t, spec.Process.Env, "a=b", "c=d")
assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args)
assert.Equal(t, "/workspace", spec.Process.Cwd)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, opts.DefaultSandboxCPUshares)
// Also checks if override of the image configs user is behaving.
t.Logf("Check username")
assert.Contains(t, spec.Process.User.Username, "test-user")
t.Logf("Check credential spec")
assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}")
t.Logf("Check PodSandbox annotations")
assert.Contains(t, spec.Annotations, annotations.SandboxID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id)
assert.Contains(t, spec.Annotations, annotations.ContainerType)
assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox)
assert.Contains(t, spec.Annotations, annotations.SandboxNamespace)
assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-ns")
assert.Contains(t, spec.Annotations, annotations.SandboxUID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-uid")
assert.Contains(t, spec.Annotations, annotations.SandboxName)
assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-name")
assert.Contains(t, spec.Annotations, annotations.SandboxLogDir)
assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory")
assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess)
assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false")
}
return config, imageConfig, specCheck
}
func TestSandboxWindowsNetworkNamespace(t *testing.T) {
testID := "test-id"
nsPath := "test-cni"
c := newControllerService()
config, imageConfig, specCheck := getRunPodSandboxTestData()
spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil)
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, spec)
assert.NotNil(t, spec.Windows)
assert.NotNil(t, spec.Windows.Network)
assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace)
}

View File

@@ -0,0 +1,29 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"github.com/containerd/containerd/v2/api/types"
"github.com/containerd/errdefs"
)
// TODO(dcantah): Implement metrics to be used for SandboxStats rpc.
func (c *Controller) Metrics(ctx context.Context, sandboxID string) (*types.Metric, error) {
return nil, errdefs.ErrNotImplemented
}

View File

@@ -0,0 +1,155 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"encoding/json"
"fmt"
"github.com/containerd/typeurl/v2"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/sandbox"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
critypes "github.com/containerd/containerd/v2/internal/cri/types"
"github.com/containerd/errdefs"
)
func (c *Controller) Status(ctx context.Context, sandboxID string, verbose bool) (sandbox.ControllerStatus, error) {
sb := c.store.Get(sandboxID)
if sb == nil {
return sandbox.ControllerStatus{}, fmt.Errorf("unable to find sandbox %q: %w", sandboxID, errdefs.ErrNotFound)
}
cstatus := sandbox.ControllerStatus{
SandboxID: sandboxID,
Pid: sb.Pid,
State: sb.State.String(),
CreatedAt: sb.CreatedAt,
Extra: nil,
}
exitStatus := sb.GetExitStatus()
if exitStatus != nil {
cstatus.ExitedAt = exitStatus.ExitTime()
}
if verbose {
info, err := toCRISandboxInfo(ctx, sb)
if err != nil {
return sandbox.ControllerStatus{}, err
}
cstatus.Info = info
}
return cstatus, nil
}
// toCRISandboxInfo converts internal container object information to CRI sandbox status response info map.
func toCRISandboxInfo(ctx context.Context, sb *types.PodSandbox) (map[string]string, error) {
si := &critypes.SandboxInfo{
Pid: sb.Pid,
Config: sb.Metadata.Config,
RuntimeHandler: sb.Metadata.RuntimeHandler,
CNIResult: sb.Metadata.CNIResult,
Metadata: &sb.Metadata,
}
if container := sb.Container; container != nil {
task, err := container.Task(ctx, nil)
if err != nil && !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to get sandbox container task: %w", err)
}
var processStatus containerd.ProcessStatus
if task != nil {
if taskStatus, err := task.Status(ctx); err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to get task status: %w", err)
}
processStatus = containerd.Unknown
} else {
processStatus = taskStatus.Status
}
}
si.Status = string(processStatus)
spec, err := container.Spec(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox container runtime spec: %w", err)
}
si.RuntimeSpec = spec
ctrInfo, err := container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
}
// Do not use config.SandboxImage because the configuration might
// be changed during restart. It may not reflect the actual image
// used by the sandbox container.
si.Image = ctrInfo.Image
si.SnapshotKey = ctrInfo.SnapshotKey
si.Snapshotter = ctrInfo.Snapshotter
runtimeOptions, err := getRuntimeOptions(ctrInfo)
if err != nil {
return nil, fmt.Errorf("failed to get runtime options: %w", err)
}
si.RuntimeType = ctrInfo.Runtime.Name
si.RuntimeOptions = runtimeOptions
}
if si.Status == "" {
// If processStatus is empty, it means that the task is deleted. Apply "deleted"
// status which does not exist in containerd.
si.Status = "deleted"
}
netns := getNetNS(&sb.Metadata)
if netns != nil {
// Add network closed information if sandbox is not using host network.
closed, err := netns.Closed()
if err != nil {
return nil, fmt.Errorf("failed to check network namespace closed: %w", err)
}
si.NetNSClosed = closed
}
infoBytes, err := json.Marshal(si)
if err != nil {
return nil, fmt.Errorf("failed to marshal info %v: %w", si, err)
}
return map[string]string{
"info": string(infoBytes),
}, nil
}
// getRuntimeOptions get runtime options from container metadata.
func getRuntimeOptions(c containers.Container) (interface{}, error) {
from := c.Runtime.Options
if from == nil || from.GetValue() == nil {
return nil, nil
}
opts, err := typeurl.UnmarshalAny(from)
if err != nil {
return nil, err
}
return opts, nil
}

View File

@@ -0,0 +1,129 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"context"
"fmt"
"syscall"
"time"
"github.com/containerd/log"
eventtypes "github.com/containerd/containerd/v2/api/events"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/sandbox"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
)
func (c *Controller) Stop(ctx context.Context, sandboxID string, _ ...sandbox.StopOpt) error {
podSandbox := c.store.Get(sandboxID)
if podSandbox == nil {
return errdefs.ErrNotFound
}
if podSandbox.Container == nil {
return nil
}
meta, err := getMetadata(ctx, podSandbox.Container)
if err != nil {
return err
}
state := podSandbox.State
if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown {
if err := c.stopSandboxContainer(ctx, podSandbox); err != nil {
return fmt.Errorf("failed to stop sandbox container %q in %q state: %w", sandboxID, state, err)
}
}
if err := c.cleanupSandboxFiles(sandboxID, meta.Config); err != nil {
return fmt.Errorf("failed to cleanup sandbox files: %w", err)
}
return nil
}
// stopSandboxContainer kills the sandbox container.
// `task.Delete` is not called here because it will be called when
// the event monitor handles the `TaskExit` event.
func (c *Controller) stopSandboxContainer(ctx context.Context, podSandbox *types.PodSandbox) error {
id := podSandbox.ID
container := podSandbox.Container
state := podSandbox.State
task, err := container.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get pod sandbox container: %w", err)
}
// Don't return for unknown state, some cleanup needs to be done.
if state == sandboxstore.StateUnknown {
return cleanupUnknownSandbox(ctx, id, podSandbox)
}
return nil
}
// Handle unknown state.
// The cleanup logic is the same with container unknown state.
if state == sandboxstore.StateUnknown {
// Start an exit handler for sandbox container in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to wait for task: %w", err)
}
return cleanupUnknownSandbox(ctx, id, podSandbox)
}
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
exitStatus, exitedAt, err := c.waitSandboxExit(exitCtx, podSandbox, exitCh)
if err != context.Canceled && err != context.DeadlineExceeded {
// The error of context.Canceled or context.DeadlineExceeded indicates the task.Wait is not finished,
// so we can not set the exit status of the pod sandbox.
podSandbox.Exit(*containerd.NewExitStatus(exitStatus, exitedAt, err))
} else {
log.G(ctx).WithError(err).Errorf("Failed to wait pod sandbox exit %+v", err)
}
}()
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
}
// Kill the pod sandbox container.
if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to kill pod sandbox container: %w", err)
}
_, err = podSandbox.Wait(ctx)
return err
}
// cleanupUnknownSandbox cleanup stopped sandbox in unknown state.
func cleanupUnknownSandbox(ctx context.Context, id string, sandbox *types.PodSandbox) error {
// Reuse handleSandboxTaskExit to do the cleanup.
return handleSandboxTaskExit(ctx, sandbox, &eventtypes.TaskExit{ExitStatus: unknownExitCode, ExitedAt: protobuf.ToTimestamp(time.Now())})
}

View File

@@ -0,0 +1,56 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsandbox
import (
"fmt"
"sync"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
)
type Store struct {
m sync.Map
}
func NewStore() *Store {
return &Store{}
}
func (s *Store) Save(p *types.PodSandbox) error {
if p == nil {
return fmt.Errorf("pod sandbox should not be nil")
}
s.m.Store(p.ID, p)
return nil
}
func (s *Store) Get(id string) *types.PodSandbox {
i, ok := s.m.Load(id)
if !ok {
return nil
}
return i.(*types.PodSandbox)
}
func (s *Store) Remove(id string) *types.PodSandbox {
i, ok := s.m.LoadAndDelete(id)
if !ok {
return nil
}
return i.(*types.PodSandbox)
}

View File

@@ -0,0 +1,83 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package types
import (
"context"
"sync"
"time"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/sandbox"
"github.com/containerd/containerd/v2/internal/cri/store"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
type PodSandbox struct {
mu sync.Mutex
ID string
Container containerd.Container
State sandboxstore.State
Metadata sandboxstore.Metadata
Runtime sandbox.RuntimeOpts
Pid uint32
CreatedAt time.Time
stopChan *store.StopCh
exitStatus *containerd.ExitStatus
}
func NewPodSandbox(id string, status sandboxstore.Status) *PodSandbox {
podSandbox := &PodSandbox{
ID: id,
Container: nil,
stopChan: store.NewStopCh(),
CreatedAt: status.CreatedAt,
State: status.State,
Pid: status.Pid,
}
if status.State == sandboxstore.StateNotReady {
podSandbox.Exit(*containerd.NewExitStatus(status.ExitStatus, status.ExitedAt, nil))
}
return podSandbox
}
func (p *PodSandbox) Exit(status containerd.ExitStatus) {
p.mu.Lock()
defer p.mu.Unlock()
p.exitStatus = &status
p.State = sandboxstore.StateNotReady
p.stopChan.Stop()
}
func (p *PodSandbox) Wait(ctx context.Context) (*containerd.ExitStatus, error) {
s := p.GetExitStatus()
if s != nil {
return s, nil
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-p.stopChan.Stopped():
return p.GetExitStatus(), nil
}
}
func (p *PodSandbox) GetExitStatus() *containerd.ExitStatus {
p.mu.Lock()
defer p.mu.Unlock()
return p.exitStatus
}

View File

@@ -0,0 +1,49 @@
//go:build !no_rdt
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"github.com/containerd/containerd/v2/pkg/rdt"
"github.com/containerd/log"
)
// rdtClassFromAnnotations examines container and pod annotations of a
// container and returns its effective RDT class.
func (c *criService) rdtClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
cls, err := rdt.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations)
if err == nil {
// Our internal check that RDT has been enabled
if cls != "" && !rdt.IsEnabled() {
err = fmt.Errorf("RDT disabled, refusing to set RDT class of container %q to %q", containerName, cls)
}
}
if err != nil {
if !rdt.IsEnabled() && c.config.ContainerdConfig.IgnoreRdtNotEnabledErrors {
log.L.Debugf("continuing create container %s, ignoring rdt not enabled (%v)", containerName, err)
return "", nil
}
return "", err
}
return cls, nil
}

View File

@@ -0,0 +1,23 @@
//go:build no_rdt
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
func (c *criService) rdtClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
return "", nil
}

View File

@@ -0,0 +1,469 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
containerd "github.com/containerd/containerd/v2/client"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
crilabels "github.com/containerd/containerd/v2/internal/cri/labels"
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/containerd/v2/pkg/netns"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
"golang.org/x/sync/errgroup"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/internal/cri/io"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
)
// NOTE: The recovery logic has following assumption: when the cri plugin is down:
// 1) Files (e.g. root directory, netns) and checkpoint maintained by the plugin MUST NOT be
// touched. Or else, recovery logic for those containers/sandboxes may return error.
// 2) Containerd containers may be deleted, but SHOULD NOT be added. Or else, recovery logic
// for the newly added container/sandbox will return error, because there is no corresponding root
// directory created.
// 3) Containerd container tasks may exit or be stopped, deleted. Even though current logic could
// tolerant tasks being created or started, we prefer that not to happen.
// recover recovers system state from containerd and status checkpoint.
func (c *criService) recover(ctx context.Context) error {
// Recover all sandboxes.
sandboxes, err := c.client.Containers(ctx, filterLabel(crilabels.ContainerKindLabel, crilabels.ContainerKindSandbox))
if err != nil {
return fmt.Errorf("failed to list sandbox containers: %w", err)
}
podSandboxController := c.client.SandboxController(string(criconfig.ModePodSandbox))
podSandboxLoader, ok := podSandboxController.(podSandboxRecover)
if !ok {
log.G(ctx).Fatal("pod sandbox controller doesn't support recovery")
}
eg, ctx2 := errgroup.WithContext(ctx)
for _, sandbox := range sandboxes {
sandbox := sandbox
eg.Go(func() error {
sb, err := podSandboxLoader.RecoverContainer(ctx2, sandbox)
if err != nil {
log.G(ctx2).
WithError(err).
WithField("sandbox", sandbox.ID()).
Error("Failed to load sandbox")
return nil
}
log.G(ctx2).Debugf("Loaded sandbox %+v", sb)
if err := c.sandboxStore.Add(sb); err != nil {
return fmt.Errorf("failed to add sandbox %q to store: %w", sandbox.ID(), err)
}
if err := c.sandboxNameIndex.Reserve(sb.Name, sb.ID); err != nil {
return fmt.Errorf("failed to reserve sandbox name %q: %w", sb.Name, err)
}
return nil
})
}
if err := eg.Wait(); err != nil {
return err
}
// Recover sandboxes in the new SandboxStore
storedSandboxes, err := c.client.SandboxStore().List(ctx)
if err != nil {
return fmt.Errorf("failed to list sandboxes from API: %w", err)
}
for _, sbx := range storedSandboxes {
if _, err := c.sandboxStore.Get(sbx.ID); err == nil {
continue
}
metadata := sandboxstore.Metadata{}
err := sbx.GetExtension(podsandbox.MetadataKey, &metadata)
if err != nil {
return fmt.Errorf("failed to get metadata for stored sandbox %q: %w", sbx.ID, err)
}
var (
state = sandboxstore.StateUnknown
controller = c.client.SandboxController(sbx.Sandboxer)
)
status, err := controller.Status(ctx, sbx.ID, false)
if err != nil {
log.G(ctx).
WithError(err).
WithField("sandbox", sbx.ID).
Error("failed to recover sandbox state")
if errdefs.IsNotFound(err) {
state = sandboxstore.StateNotReady
}
} else {
if code, ok := runtime.PodSandboxState_value[status.State]; ok {
if code == int32(runtime.PodSandboxState_SANDBOX_READY) {
state = sandboxstore.StateReady
} else if code == int32(runtime.PodSandboxState_SANDBOX_NOTREADY) {
state = sandboxstore.StateNotReady
}
}
}
sb := sandboxstore.NewSandbox(metadata, sandboxstore.Status{State: state})
// Load network namespace.
sb.NetNS = getNetNS(&metadata)
if err := c.sandboxStore.Add(sb); err != nil {
return fmt.Errorf("failed to add stored sandbox %q to store: %w", sbx.ID, err)
}
}
for _, sb := range c.sandboxStore.List() {
sb := sb
status := sb.Status.Get()
if status.State == sandboxstore.StateNotReady {
continue
}
controller, err := c.sandboxService.SandboxController(sb.Config, sb.RuntimeHandler)
if err != nil {
log.G(ctx).WithError(err).Error("failed to get sandbox controller while waiting sandbox")
continue
}
exitCh := make(chan containerd.ExitStatus, 1)
go func() {
exit, err := controller.Wait(ctrdutil.NamespacedContext(), sb.ID)
if err != nil {
log.G(ctx).WithError(err).Error("failed to wait for sandbox exit")
exitCh <- *containerd.NewExitStatus(containerd.UnknownExitStatus, time.Time{}, err)
}
exitCh <- *containerd.NewExitStatus(exit.ExitStatus, exit.ExitedAt, nil)
}()
c.eventMonitor.startSandboxExitMonitor(context.Background(), sb.ID, exitCh)
}
// Recover all containers.
containers, err := c.client.Containers(ctx, filterLabel(crilabels.ContainerKindLabel, crilabels.ContainerKindContainer))
if err != nil {
return fmt.Errorf("failed to list containers: %w", err)
}
eg, ctx2 = errgroup.WithContext(ctx)
for _, container := range containers {
container := container
eg.Go(func() error {
cntr, err := c.loadContainer(ctx2, container)
if err != nil {
log.G(ctx2).
WithError(err).
WithField("container", container.ID()).
Error("Failed to load container")
return nil
}
log.G(ctx2).Debugf("Loaded container %+v", cntr)
if err := c.containerStore.Add(cntr); err != nil {
return fmt.Errorf("failed to add container %q to store: %w", container.ID(), err)
}
if err := c.containerNameIndex.Reserve(cntr.Name, cntr.ID); err != nil {
return fmt.Errorf("failed to reserve container name %q: %w", cntr.Name, err)
}
return nil
})
}
if err := eg.Wait(); err != nil {
return err
}
// Recover all images.
if err := c.ImageService.CheckImages(ctx); err != nil {
return fmt.Errorf("failed to check images: %w", err)
}
// It's possible that containerd containers are deleted unexpectedly. In that case,
// we can't even get metadata, we should cleanup orphaned sandbox/container directories
// with best effort.
// Cleanup orphaned sandbox and container directories without corresponding containerd container.
for _, cleanup := range []struct {
cntrs []containerd.Container
base string
errMsg string
}{
{
cntrs: sandboxes,
base: filepath.Join(c.config.RootDir, sandboxesDir),
errMsg: "failed to cleanup orphaned sandbox directories",
},
{
cntrs: sandboxes,
base: filepath.Join(c.config.StateDir, sandboxesDir),
errMsg: "failed to cleanup orphaned volatile sandbox directories",
},
{
cntrs: containers,
base: filepath.Join(c.config.RootDir, containersDir),
errMsg: "failed to cleanup orphaned container directories",
},
{
cntrs: containers,
base: filepath.Join(c.config.StateDir, containersDir),
errMsg: "failed to cleanup orphaned volatile container directories",
},
} {
if err := cleanupOrphanedIDDirs(ctx, cleanup.cntrs, cleanup.base); err != nil {
return fmt.Errorf("%s: %w", cleanup.errMsg, err)
}
}
return nil
}
// loadContainerTimeout is the default timeout for loading a container/sandbox.
// One container/sandbox hangs (e.g. containerd#2438) should not affect other
// containers/sandboxes.
// Most CRI container/sandbox related operations are per container, the ones
// which handle multiple containers at a time are:
// * ListPodSandboxes: Don't talk with containerd services.
// * ListContainers: Don't talk with containerd services.
// * ListContainerStats: Not in critical code path, a default timeout will
// be applied at CRI level.
// * Recovery logic: We should set a time for each container/sandbox recovery.
// * Event monitor: We should set a timeout for each container/sandbox event handling.
const loadContainerTimeout = 10 * time.Second
// loadContainer loads container from containerd and status checkpoint.
func (c *criService) loadContainer(ctx context.Context, cntr containerd.Container) (containerstore.Container, error) {
ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
defer cancel()
id := cntr.ID()
containerDir := c.getContainerRootDir(id)
volatileContainerDir := c.getVolatileContainerRootDir(id)
var container containerstore.Container
// Load container metadata.
exts, err := cntr.Extensions(ctx)
if err != nil {
return container, fmt.Errorf("failed to get container extensions: %w", err)
}
ext, ok := exts[crilabels.ContainerMetadataExtension]
if !ok {
return container, fmt.Errorf("metadata extension %q not found", crilabels.ContainerMetadataExtension)
}
data, err := typeurl.UnmarshalAny(ext)
if err != nil {
return container, fmt.Errorf("failed to unmarshal metadata extension %q: %w", ext, err)
}
meta := data.(*containerstore.Metadata)
// Load status from checkpoint.
status, err := containerstore.LoadStatus(containerDir, id)
if err != nil {
log.G(ctx).WithError(err).Warnf("Failed to load container status for %q", id)
status = unknownContainerStatus()
}
var containerIO *cio.ContainerIO
err = func() error {
// Load up-to-date status from containerd.
t, err := cntr.Task(ctx, func(fifos *containerdio.FIFOSet) (_ containerdio.IO, err error) {
stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, meta.Config.GetTty())
if err != nil {
return nil, err
}
defer func() {
if err != nil {
if stdoutWC != nil {
stdoutWC.Close()
}
if stderrWC != nil {
stderrWC.Close()
}
}
}()
containerIO, err = cio.NewContainerIO(id,
cio.WithFIFOs(fifos),
)
if err != nil {
return nil, err
}
containerIO.AddOutput("log", stdoutWC, stderrWC)
containerIO.Pipe()
return containerIO, nil
})
if err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to load task: %w", err)
}
var s containerd.Status
var notFound bool
if errdefs.IsNotFound(err) {
// Task is not found.
notFound = true
} else {
// Task is found. Get task status.
s, err = t.Status(ctx)
if err != nil {
// It's still possible that task is deleted during this window.
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get task status: %w", err)
}
notFound = true
}
}
if notFound {
// Task is not created or has been deleted, use the checkpointed status
// to generate container status.
switch status.State() {
case runtime.ContainerState_CONTAINER_CREATED:
// NOTE: Another possibility is that we've tried to start the container, but
// containerd got restarted during that. In that case, we still
// treat the container as `CREATED`.
containerIO, err = cio.NewContainerIO(id,
cio.WithNewFIFOs(volatileContainerDir, meta.Config.GetTty(), meta.Config.GetStdin()),
)
if err != nil {
return fmt.Errorf("failed to create container io: %w", err)
}
case runtime.ContainerState_CONTAINER_RUNNING:
// Container was in running state, but its task has been deleted,
// set unknown exited state. Container io is not needed in this case.
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = unknownExitCode
status.Reason = unknownExitReason
default:
// Container is in exited/unknown state, return the status as it is.
}
} else {
// Task status is found. Update container status based on the up-to-date task status.
switch s.Status {
case containerd.Created:
// Task has been created, but not started yet. This could only happen if containerd
// gets restarted during container start.
// Container must be in `CREATED` state.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to delete task: %w", err)
}
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return fmt.Errorf("unexpected container state for created task: %q", status.State())
}
case containerd.Running:
// Task is running. Container must be in `RUNNING` state, based on our assumption that
// "task should not be started when containerd is down".
switch status.State() {
case runtime.ContainerState_CONTAINER_EXITED:
return fmt.Errorf("unexpected container state for running task: %q", status.State())
case runtime.ContainerState_CONTAINER_RUNNING:
default:
// This may happen if containerd gets restarted after task is started, but
// before status is checkpointed.
status.StartedAt = time.Now().UnixNano()
status.Pid = t.Pid()
}
// Wait for the task for exit monitor.
// wait is a long running background request, no timeout needed.
exitCh, err := t.Wait(ctrdutil.NamespacedContext())
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to wait for task: %w", err)
}
// Container was in running state, but its task has been deleted,
// set unknown exited state.
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = unknownExitCode
status.Reason = unknownExitReason
} else {
// Start exit monitor.
c.eventMonitor.startContainerExitMonitor(context.Background(), id, status.Pid, exitCh)
}
case containerd.Stopped:
// Task is stopped. Update status and delete the task.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to delete task: %w", err)
}
status.FinishedAt = s.ExitTime.UnixNano()
status.ExitCode = int32(s.ExitStatus)
default:
return fmt.Errorf("unexpected task status %q", s.Status)
}
}
return nil
}()
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to load container status for %q", id)
// Only set the unknown field in this case, because other fields may
// contain useful information loaded from the checkpoint.
status.Unknown = true
}
opts := []containerstore.Opts{
containerstore.WithStatus(status, containerDir),
containerstore.WithContainer(cntr),
}
// containerIO could be nil for container in unknown state.
if containerIO != nil {
opts = append(opts, containerstore.WithContainerIO(containerIO))
}
return containerstore.NewContainer(*meta, opts...)
}
// podSandboxRecover is an additional interface implemented by podsandbox/ controller to handle
// Pod sandbox containers recovery.
type podSandboxRecover interface {
RecoverContainer(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error)
}
func getNetNS(meta *sandboxstore.Metadata) *netns.NetNS {
// Don't need to load netns for host network sandbox.
if hostNetwork(meta.Config) {
return nil
}
return netns.LoadNetNS(meta.NetNSPath)
}
func cleanupOrphanedIDDirs(ctx context.Context, cntrs []containerd.Container, base string) error {
// Cleanup orphaned id directories.
dirs, err := os.ReadDir(base)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to read base directory: %w", err)
}
idsMap := make(map[string]containerd.Container)
for _, cntr := range cntrs {
idsMap[cntr.ID()] = cntr
}
for _, d := range dirs {
if !d.IsDir() {
log.G(ctx).Warnf("Invalid file %q found in base directory %q", d.Name(), base)
continue
}
if _, ok := idsMap[d.Name()]; ok {
// Do not remove id directory if corresponding container is found.
continue
}
dir := filepath.Join(base, d.Name())
if err := ensureRemoveAll(ctx, dir); err != nil {
log.G(ctx).WithError(err).Warnf("Failed to remove id directory %q", dir)
} else {
log.G(ctx).Debugf("Cleanup orphaned id directory %q", dir)
}
}
return nil
}

View File

@@ -0,0 +1,31 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// RuntimeConfig returns configuration information of the runtime.
func (c *criService) RuntimeConfig(ctx context.Context, r *runtime.RuntimeConfigRequest) (*runtime.RuntimeConfigResponse, error) {
resp := &runtime.RuntimeConfigResponse{
Linux: c.getLinuxRuntimeConfig(ctx),
}
return resp, nil
}

View File

@@ -0,0 +1,82 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"sort"
runcoptions "github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/pkg/systemd"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration {
return &runtime.LinuxRuntimeConfiguration{CgroupDriver: c.getCgroupDriver(ctx)}
}
func (c *criService) getCgroupDriver(ctx context.Context) runtime.CgroupDriver {
// Go through the runtime handlers in a predictable order, starting from the
// default handler, others sorted in alphabetical order
handlerNames := make([]string, 0, len(c.config.ContainerdConfig.Runtimes))
for n := range c.config.ContainerdConfig.Runtimes {
handlerNames = append(handlerNames, n)
}
sort.Slice(handlerNames, func(i, j int) bool {
if handlerNames[i] == c.config.ContainerdConfig.DefaultRuntimeName {
return true
}
if handlerNames[j] == c.config.ContainerdConfig.DefaultRuntimeName {
return false
}
return handlerNames[i] < handlerNames[j]
})
for _, handler := range handlerNames {
opts, err := criconfig.GenerateRuntimeOptions(c.config.ContainerdConfig.Runtimes[handler])
if err != nil {
log.G(ctx).Debugf("failed to parse runtime handler options for %q", handler)
continue
}
if d, ok := getCgroupDriverFromRuntimeHandlerOpts(opts); ok {
return d
}
log.G(ctx).Debugf("runtime handler %q does not provide cgroup driver information", handler)
}
// If no runtime handlers have a setting, detect if systemd is running
d := runtime.CgroupDriver_CGROUPFS
if systemd.IsRunningSystemd() {
d = runtime.CgroupDriver_SYSTEMD
}
log.G(ctx).Debugf("no runtime handler provided cgroup driver setting, using auto-detected %s", runtime.CgroupDriver_name[int32(d)])
return d
}
func getCgroupDriverFromRuntimeHandlerOpts(opts interface{}) (runtime.CgroupDriver, bool) {
switch v := opts.(type) {
case *runcoptions.Options:
systemdCgroup := v.SystemdCgroup
if systemdCgroup {
return runtime.CgroupDriver_SYSTEMD, true
}
return runtime.CgroupDriver_CGROUPFS, true
}
return runtime.CgroupDriver_SYSTEMD, false
}

View File

@@ -0,0 +1,105 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/pkg/systemd"
"github.com/containerd/containerd/v2/plugins"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func newFakeRuntimeConfig(runcV2, systemdCgroup bool) criconfig.Runtime {
r := criconfig.Runtime{Type: "default", Options: map[string]interface{}{}}
if runcV2 {
r.Type = plugins.RuntimeRuncV2
if systemdCgroup {
r.Options["SystemdCgroup"] = true
}
}
return r
}
func TestRuntimeConfig(t *testing.T) {
autoDetected := runtime.CgroupDriver_CGROUPFS
if systemd.IsRunningSystemd() {
autoDetected = runtime.CgroupDriver_SYSTEMD
}
for _, test := range []struct {
desc string
defaultRuntime string
runtimes map[string]criconfig.Runtime
expectedCgroupDriver runtime.CgroupDriver
}{
{
desc: "no runtimes",
expectedCgroupDriver: autoDetected,
},
{
desc: "non-runc runtime",
defaultRuntime: "non-runc",
runtimes: map[string]criconfig.Runtime{"non-runc": newFakeRuntimeConfig(false, false)},
expectedCgroupDriver: autoDetected,
},
{
desc: "no default, pick first in alphabetical order",
runtimes: map[string]criconfig.Runtime{
"non-runc": newFakeRuntimeConfig(false, false),
"runc-2": newFakeRuntimeConfig(true, true),
"runc": newFakeRuntimeConfig(true, false),
"non-runc-2": newFakeRuntimeConfig(false, false),
},
expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS,
},
{
desc: "pick default, cgroupfs",
defaultRuntime: "runc-2",
runtimes: map[string]criconfig.Runtime{
"non-runc": newFakeRuntimeConfig(false, false),
"runc": newFakeRuntimeConfig(true, true),
"runc-2": newFakeRuntimeConfig(true, false),
},
expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS,
},
{
desc: "pick default, systemd",
defaultRuntime: "runc-2",
runtimes: map[string]criconfig.Runtime{
"non-runc": newFakeRuntimeConfig(false, false),
"runc": newFakeRuntimeConfig(true, false),
"runc-2": newFakeRuntimeConfig(true, true),
},
expectedCgroupDriver: runtime.CgroupDriver_SYSTEMD,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
c.config.RuntimeConfig.ContainerdConfig.DefaultRuntimeName = test.defaultRuntime
c.config.RuntimeConfig.ContainerdConfig.Runtimes = test.runtimes
resp, err := c.RuntimeConfig(context.TODO(), &runtime.RuntimeConfigRequest{})
assert.NoError(t, err)
assert.Equal(t, test.expectedCgroupDriver, resp.Linux.CgroupDriver, "got unexpected cgroup driver")
})
}
}

View File

@@ -0,0 +1,29 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration {
return nil
}

View File

@@ -0,0 +1,112 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"time"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
// ListPodSandbox returns a list of Sandbox.
func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
start := time.Now()
// List all sandboxes from store.
sandboxesInStore := c.sandboxStore.List()
var sandboxes []*runtime.PodSandbox
for _, sandboxInStore := range sandboxesInStore {
sandboxes = append(sandboxes, toCRISandbox(
sandboxInStore.Metadata,
sandboxInStore.Status.Get(),
))
}
sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())
sandboxListTimer.UpdateSince(start)
return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
}
// toCRISandbox converts sandbox metadata into CRI pod sandbox.
func toCRISandbox(meta sandboxstore.Metadata, status sandboxstore.Status) *runtime.PodSandbox {
// Set sandbox state to NOTREADY by default.
state := runtime.PodSandboxState_SANDBOX_NOTREADY
if status.State == sandboxstore.StateReady {
state = runtime.PodSandboxState_SANDBOX_READY
}
return &runtime.PodSandbox{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
State: state,
CreatedAt: status.CreatedAt.UnixNano(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
RuntimeHandler: meta.RuntimeHandler,
}
}
func (c *criService) normalizePodSandboxFilter(filter *runtime.PodSandboxFilter) {
if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil {
filter.Id = sb.ID
}
}
func (c *criService) normalizePodSandboxStatsFilter(filter *runtime.PodSandboxStatsFilter) {
if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil {
filter.Id = sb.ID
}
}
// filterCRISandboxes filters CRISandboxes.
func (c *criService) filterCRISandboxes(sandboxes []*runtime.PodSandbox, filter *runtime.PodSandboxFilter) []*runtime.PodSandbox {
if filter == nil {
return sandboxes
}
c.normalizePodSandboxFilter(filter)
filtered := []*runtime.PodSandbox{}
for _, s := range sandboxes {
// Filter by id
if filter.GetId() != "" && filter.GetId() != s.Id {
continue
}
// Filter by state
if filter.GetState() != nil && filter.GetState().GetState() != s.State {
continue
}
// Filter by label
if filter.GetLabelSelector() != nil {
match := true
for k, v := range filter.GetLabelSelector() {
got, ok := s.Labels[k]
if !ok || got != v {
match = false
break
}
}
if !match {
continue
}
}
filtered = append(filtered, s)
}
return filtered
}

View File

@@ -0,0 +1,225 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
)
func TestToCRISandbox(t *testing.T) {
config := &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-name",
Uid: "test-uid",
Namespace: "test-ns",
Attempt: 1,
},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
}
createdAt := time.Now()
meta := sandboxstore.Metadata{
ID: "test-id",
Name: "test-name",
Config: config,
NetNSPath: "test-netns",
RuntimeHandler: "test-runtime-handler",
}
expect := &runtime.PodSandbox{
Id: "test-id",
Metadata: config.GetMetadata(),
CreatedAt: createdAt.UnixNano(),
Labels: config.GetLabels(),
Annotations: config.GetAnnotations(),
RuntimeHandler: "test-runtime-handler",
}
for _, test := range []struct {
desc string
state sandboxstore.State
expectedState runtime.PodSandboxState
}{
{
desc: "sandbox state ready",
state: sandboxstore.StateReady,
expectedState: runtime.PodSandboxState_SANDBOX_READY,
},
{
desc: "sandbox state not ready",
state: sandboxstore.StateNotReady,
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
},
{
desc: "sandbox state unknown",
state: sandboxstore.StateUnknown,
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
status := sandboxstore.Status{
CreatedAt: createdAt,
State: test.state,
}
expect.State = test.expectedState
s := toCRISandbox(meta, status)
assert.Equal(t, expect, s, test.desc)
})
}
}
func TestFilterSandboxes(t *testing.T) {
c := newTestCRIService()
sandboxes := []sandboxstore.Sandbox{
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "1abcdef",
Name: "sandboxname-1",
Config: &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "podname-1",
Uid: "uid-1",
Namespace: "ns-1",
Attempt: 1,
},
},
RuntimeHandler: "test-runtime-handler",
},
sandboxstore.Status{
CreatedAt: time.Now(),
State: sandboxstore.StateReady,
},
),
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "2abcdef",
Name: "sandboxname-2",
Config: &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "podname-2",
Uid: "uid-2",
Namespace: "ns-2",
Attempt: 2,
},
Labels: map[string]string{"a": "b"},
},
RuntimeHandler: "test-runtime-handler",
},
sandboxstore.Status{
CreatedAt: time.Now(),
State: sandboxstore.StateNotReady,
},
),
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "3abcdef",
Name: "sandboxname-3",
Config: &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "podname-2",
Uid: "uid-2",
Namespace: "ns-2",
Attempt: 2,
},
Labels: map[string]string{"c": "d"},
},
RuntimeHandler: "test-runtime-handler",
},
sandboxstore.Status{
CreatedAt: time.Now(),
State: sandboxstore.StateReady,
},
),
}
// Create PodSandbox
testSandboxes := []*runtime.PodSandbox{}
for _, sb := range sandboxes {
testSandboxes = append(testSandboxes, toCRISandbox(sb.Metadata, sb.Status.Get()))
}
// Inject test sandbox metadata
for _, sb := range sandboxes {
assert.NoError(t, c.sandboxStore.Add(sb))
}
for _, test := range []struct {
desc string
filter *runtime.PodSandboxFilter
expect []*runtime.PodSandbox
}{
{
desc: "no filter",
expect: testSandboxes,
},
{
desc: "id filter",
filter: &runtime.PodSandboxFilter{Id: "2abcdef"},
expect: []*runtime.PodSandbox{testSandboxes[1]},
},
{
desc: "truncid filter",
filter: &runtime.PodSandboxFilter{Id: "2"},
expect: []*runtime.PodSandbox{testSandboxes[1]},
},
{
desc: "state filter",
filter: &runtime.PodSandboxFilter{
State: &runtime.PodSandboxStateValue{
State: runtime.PodSandboxState_SANDBOX_READY,
},
},
expect: []*runtime.PodSandbox{testSandboxes[0], testSandboxes[2]},
},
{
desc: "label filter",
filter: &runtime.PodSandboxFilter{
LabelSelector: map[string]string{"a": "b"},
},
expect: []*runtime.PodSandbox{testSandboxes[1]},
},
{
desc: "mixed filter not matched",
filter: &runtime.PodSandboxFilter{
Id: "1",
LabelSelector: map[string]string{"a": "b"},
},
expect: []*runtime.PodSandbox{},
},
{
desc: "mixed filter matched",
filter: &runtime.PodSandboxFilter{
State: &runtime.PodSandboxStateValue{
State: runtime.PodSandboxState_SANDBOX_READY,
},
LabelSelector: map[string]string{"c": "d"},
},
expect: []*runtime.PodSandbox{testSandboxes[2]},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
filtered := c.filterCRISandboxes(testSandboxes, test.filter)
assert.Equal(t, test.expect, filtered, test.desc)
})
}
}

Some files were not shown because too many files have changed in this diff Show More