diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index babb7b4ae..ec1fa95aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -245,7 +245,6 @@ jobs: fail-fast: false matrix: os: [windows-2019, windows-2022] - disable_cri_sandboxes: ["", "legacyCRI"] defaults: run: @@ -337,7 +336,6 @@ jobs: - name: Integration 1 env: CGO_ENABLED: 1 - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} GOTESTSUM_JUNITFILE: ${{github.workspace}}/test-integration-serial-junit.xml GOTESTSUM_JSONFILE: ${{github.workspace}}/test-integration-serial-gotest.json EXTRA_TESTFLAGS: "-timeout=20m" @@ -355,7 +353,6 @@ jobs: TESTFLAGS_PARALLEL: 1 EXTRA_TESTFLAGS: "-short" CGO_ENABLED: 1 - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} GOTESTSUM_JUNITFILE: ${{github.workspace}}/test-integration-parallel-junit.xml GOTESTSUM_JSONFILE: ${{github.workspace}}/test-integration-parallel-gotest.json run: mingw32-make.exe integration @@ -368,14 +365,12 @@ jobs: - name: CRI Integration Test env: - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} TEST_IMAGE_LIST: ${{github.workspace}}/repolist.toml run: | make cri-integration - name: cri-tools critest env: - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} CRI_TEST_IMAGES: ${{github.workspace}}/cri-test-images.yaml shell: powershell run: | @@ -411,7 +406,6 @@ jobs: runtime: - io.containerd.runc.v2 runc: [runc, crun] - DISABLE_CRI_SANDBOXES: ["", "legacyCRI"] env: GOTEST: gotestsum -- @@ -466,7 +460,6 @@ jobs: env: TEST_RUNTIME: ${{ matrix.runtime }} RUNC_FLAVOR: ${{ matrix.runc }} - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} GOTESTSUM_JUNITFILE: ${{github.workspace}}/test-integration-serial-junit.xml GOTESTSUM_JSONFILE: ${{github.workspace}}/test-integration-serial-gotest.json run: | @@ -485,7 +478,6 @@ jobs: env: TEST_RUNTIME: ${{ matrix.runtime }} RUNC_FLAVOR: ${{ matrix.runc }} - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} GOTESTSUM_JUNITFILE: ${{github.workspace}}/test-integration-parallel-junit.xml GOTESTSUM_JSONFILE: ${{github.workspace}}/test-integration-parallel-gotest.json run: | @@ -502,14 +494,12 @@ jobs: - name: CRI Integration Test env: TEST_RUNTIME: ${{ matrix.runtime }} - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} run: | CONTAINERD_RUNTIME=$TEST_RUNTIME make cri-integration - name: cri-tools critest env: TEST_RUNTIME: ${{ matrix.runtime }} - DISABLE_CRI_SANDBOXES: ${{ matrix.disable_cri_sandboxes }} run: | sudo -E PATH=$PATH ./script/critest.sh "${{github.workspace}}/report" diff --git a/Vagrantfile b/Vagrantfile index 81ff99b7a..e67025c57 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -272,7 +272,6 @@ EOF 'GOTESTSUM_JUNITFILE': ENV['GOTESTSUM_JUNITFILE'], 'GOTESTSUM_JSONFILE': ENV['GOTESTSUM_JSONFILE'], 'GITHUB_WORKSPACE': '', - 'DISABLE_CRI_SANDBOXES': ENV['DISABLE_CRI_SANDBOXES'], } sh.inline = <<~SHELL #!/usr/bin/env bash diff --git a/containerd.service b/containerd.service index f8b925e56..7edd64645 100644 --- a/containerd.service +++ b/containerd.service @@ -18,8 +18,6 @@ Documentation=https://containerd.io After=network.target local-fs.target [Service] -#uncomment to fallback to legacy CRI plugin implementation with podsandbox support. -#Environment="DISABLE_CRI_SANDBOXES=1" ExecStartPre=-/sbin/modprobe overlay ExecStart=/usr/local/bin/containerd diff --git a/contrib/Dockerfile.test b/contrib/Dockerfile.test index 4aeac8fda..2059e3219 100644 --- a/contrib/Dockerfile.test +++ b/contrib/Dockerfile.test @@ -94,7 +94,6 @@ RUN make BUILDTAGS="no_btrfs no_devmapper" bin/cri-integration.test RUN ./script/setup/install-failpoint-binaries # The test scripts need these env vars to be explicitly set ENV GITHUB_WORKSPACE="" -ENV DISABLE_CRI_SANDBOXES="" ENV CONTAINERD_RUNTIME="io.containerd.runc.v2" CMD ["make", "cri-integration"] diff --git a/contrib/fuzz/cri_fuzzer.go b/contrib/fuzz/cri_fuzzer.go index 92bf3c9bf..5a6f21035 100644 --- a/contrib/fuzz/cri_fuzzer.go +++ b/contrib/fuzz/cri_fuzzer.go @@ -24,8 +24,8 @@ import ( fuzz "github.com/AdaLogics/go-fuzz-headers" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/containerd/pkg/cri/sbserver" "github.com/containerd/containerd/pkg/cri/server" + "github.com/containerd/containerd/pkg/cri/server/images" containerstore "github.com/containerd/containerd/pkg/cri/store/container" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" ) @@ -191,7 +191,7 @@ func sandboxStore(cs server.CRIService) (*sandboxstore.Store, error) { ss, err = server.SandboxStore(cs) if err != nil { - ss, err = sbserver.SandboxStore(cs) + ss, err = server.SandboxStore(cs) if err != nil { return nil, err } @@ -535,6 +535,6 @@ func FuzzParseAuth(data []byte) int { if err != nil { return 0 } - _, _, _ = server.ParseAuth(auth, host) + _, _, _ = images.ParseAuth(auth, host) return 1 } diff --git a/contrib/fuzz/cri_sbserver_fuzzer.go b/contrib/fuzz/cri_sbserver_fuzzer.go index ed3e88ec7..1146c1326 100644 --- a/contrib/fuzz/cri_sbserver_fuzzer.go +++ b/contrib/fuzz/cri_sbserver_fuzzer.go @@ -23,7 +23,7 @@ import ( "github.com/containerd/containerd" criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/sbserver" + "github.com/containerd/containerd/pkg/cri/server" ) func FuzzCRISandboxServer(data []byte) int { @@ -37,7 +37,7 @@ func FuzzCRISandboxServer(data []byte) int { } defer client.Close() - c, err := sbserver.NewCRIService(criconfig.Config{}, client, nil) + c, err := server.NewCRIService(criconfig.Config{}, client, nil) if err != nil { panic(err) } diff --git a/integration/image_pull_timeout_test.go b/integration/image_pull_timeout_test.go index 64d4fd345..683e1f8c4 100644 --- a/integration/image_pull_timeout_test.go +++ b/integration/image_pull_timeout_test.go @@ -459,6 +459,7 @@ func initLocalCRIPlugin(client *containerd.Client, tmpDir string, registryCfg cr }, Registry: registryCfg, ImagePullProgressTimeout: defaultImagePullProgressTimeout.String(), + StatsCollectPeriod: 10, }, ContainerdRootDir: containerdRootDir, RootDir: filepath.Join(criWorkDir, "root"), diff --git a/integration/sandbox_run_rollback_test.go b/integration/sandbox_run_rollback_test.go index e75acc37f..c9ecf5c3f 100644 --- a/integration/sandbox_run_rollback_test.go +++ b/integration/sandbox_run_rollback_test.go @@ -31,15 +31,12 @@ import ( "testing" "time" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" criapiv1 "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/containerd/pkg/cri/sbserver/podsandbox" - "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/containerd/pkg/cri/server/podsandbox" "github.com/containerd/containerd/pkg/failpoint" - "github.com/containerd/typeurl/v2" ) const ( @@ -293,40 +290,12 @@ func TestRunPodSandboxAndTeardownCNISlow(t *testing.T) { assert.Equal(t, sbConfig.Metadata.Uid, sb.Metadata.Uid) assert.Equal(t, sbConfig.Metadata.Attempt, sb.Metadata.Attempt) - if os.Getenv("DISABLE_CRI_SANDBOXES") != "" { - // non-sbserver - t.Log("Get sandbox info (non-sbserver)") - _, info, err := SandboxInfo(sb.Id) - require.NoError(t, err) - require.False(t, info.NetNSClosed) - var netNS string - for _, n := range info.RuntimeSpec.Linux.Namespaces { - if n.Type == runtimespec.NetworkNamespace { - netNS = n.Path - } - } - assert.NotEmpty(t, netNS, "network namespace should be set") + t.Log("Get sandbox info (sbserver)") + _, info, err := sbserverSandboxInfo(sb.Id) + require.NoError(t, err) + require.False(t, info.NetNSClosed) - t.Log("Get sandbox container") - c, err := GetContainer(sb.Id) - require.NoError(t, err) - md, ok := c.Extensions["io.cri-containerd.sandbox.metadata"] - require.True(t, ok, "sandbox metadata should exist in extension") - i, err := typeurl.UnmarshalAny(md) - require.NoError(t, err) - require.IsType(t, &sandbox.Metadata{}, i) - metadata, ok := i.(*sandbox.Metadata) - require.True(t, ok) - assert.Equal(t, netNS, metadata.NetNSPath, "network namespace path should be the same in runtime spec and sandbox metadata") - } else { - // sbserver - t.Log("Get sandbox info (sbserver)") - _, info, err := sbserverSandboxInfo(sb.Id) - require.NoError(t, err) - require.False(t, info.NetNSClosed) - - assert.NotEmpty(t, info.Metadata.NetNSPath, "network namespace should be set") - } + assert.NotEmpty(t, info.Metadata.NetNSPath, "network namespace should be set") } // sbserverSandboxInfo gets sandbox info. diff --git a/pkg/cri/server/bandwidth/doc.go b/pkg/cri/bandwidth/doc.go similarity index 100% rename from pkg/cri/server/bandwidth/doc.go rename to pkg/cri/bandwidth/doc.go diff --git a/pkg/cri/server/bandwidth/fake_shaper.go b/pkg/cri/bandwidth/fake_shaper.go similarity index 100% rename from pkg/cri/server/bandwidth/fake_shaper.go rename to pkg/cri/bandwidth/fake_shaper.go diff --git a/pkg/cri/server/bandwidth/interfaces.go b/pkg/cri/bandwidth/interfaces.go similarity index 100% rename from pkg/cri/server/bandwidth/interfaces.go rename to pkg/cri/bandwidth/interfaces.go diff --git a/pkg/cri/server/bandwidth/linux.go b/pkg/cri/bandwidth/linux.go similarity index 100% rename from pkg/cri/server/bandwidth/linux.go rename to pkg/cri/bandwidth/linux.go diff --git a/pkg/cri/server/bandwidth/unsupported.go b/pkg/cri/bandwidth/unsupported.go similarity index 100% rename from pkg/cri/server/bandwidth/unsupported.go rename to pkg/cri/bandwidth/unsupported.go diff --git a/pkg/cri/server/bandwidth/utils.go b/pkg/cri/bandwidth/utils.go similarity index 100% rename from pkg/cri/server/bandwidth/utils.go rename to pkg/cri/bandwidth/utils.go diff --git a/pkg/cri/cri.go b/pkg/cri/cri.go index a81fc43c9..b432ae3fa 100644 --- a/pkg/cri/cri.go +++ b/pkg/cri/cri.go @@ -19,12 +19,11 @@ package cri import ( "flag" "fmt" - "os" "path/filepath" "github.com/containerd/containerd" "github.com/containerd/containerd/pkg/cri/nri" - "github.com/containerd/containerd/pkg/cri/sbserver" + "github.com/containerd/containerd/pkg/cri/server" nriservice "github.com/containerd/containerd/pkg/nri" "github.com/containerd/containerd/platforms" "github.com/containerd/containerd/plugin" @@ -34,7 +33,6 @@ import ( criconfig "github.com/containerd/containerd/pkg/cri/config" "github.com/containerd/containerd/pkg/cri/constants" - "github.com/containerd/containerd/pkg/cri/server" ) // Register CRI service plugin @@ -86,14 +84,7 @@ func initCRIService(ic *plugin.InitContext) (interface{}, error) { return nil, fmt.Errorf("failed to create containerd client: %w", err) } - var s server.CRIService - if os.Getenv("DISABLE_CRI_SANDBOXES") == "" { - log.G(ctx).Info("using CRI Sandbox server - use DISABLE_CRI_SANDBOXES=1 to fallback to legacy CRI") - s, err = sbserver.NewCRIService(c, client, getNRIAPI(ic)) - } else { - log.G(ctx).Info("using legacy CRI server") - s, err = server.NewCRIService(c, client, getNRIAPI(ic)) - } + s, err := server.NewCRIService(c, client, getNRIAPI(ic)) if err != nil { return nil, fmt.Errorf("failed to create CRI service: %w", err) } diff --git a/pkg/cri/sbserver/blockio_linux.go b/pkg/cri/sbserver/blockio_linux.go deleted file mode 100644 index 1ccf10189..000000000 --- a/pkg/cri/sbserver/blockio_linux.go +++ /dev/null @@ -1,45 +0,0 @@ -//go:build linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "fmt" - - "github.com/containerd/containerd/pkg/blockio" - "github.com/containerd/log" -) - -// blockIOClassFromAnnotations examines container and pod annotations of a -// container and returns its effective blockio class. -func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { - cls, err := blockio.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations) - if err != nil { - return "", err - } - - if cls != "" && !blockio.IsEnabled() { - if c.config.ContainerdConfig.IgnoreBlockIONotEnabledErrors { - cls = "" - log.L.Debugf("continuing create container %s, ignoring blockio not enabled (%v)", containerName, err) - } else { - return "", fmt.Errorf("blockio disabled, refusing to set blockio class of container %q to %q", containerName, cls) - } - } - return cls, nil -} diff --git a/pkg/cri/sbserver/blockio_stub.go b/pkg/cri/sbserver/blockio_stub.go deleted file mode 100644 index a8f8e66f2..000000000 --- a/pkg/cri/sbserver/blockio_stub.go +++ /dev/null @@ -1,23 +0,0 @@ -//go:build !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { - return "", nil -} diff --git a/pkg/cri/sbserver/cni_conf_syncer.go b/pkg/cri/sbserver/cni_conf_syncer.go deleted file mode 100644 index c94bde502..000000000 --- a/pkg/cri/sbserver/cni_conf_syncer.go +++ /dev/null @@ -1,133 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/containerd/go-cni" - "github.com/containerd/log" - "github.com/fsnotify/fsnotify" -) - -// cniNetConfSyncer is used to reload cni network conf triggered by fs change -// events. -type cniNetConfSyncer struct { - // only used for lastSyncStatus - sync.RWMutex - lastSyncStatus error - - watcher *fsnotify.Watcher - confDir string - netPlugin cni.CNI - loadOpts []cni.Opt -} - -// newCNINetConfSyncer creates cni network conf syncer. -func newCNINetConfSyncer(confDir string, netPlugin cni.CNI, loadOpts []cni.Opt) (*cniNetConfSyncer, error) { - watcher, err := fsnotify.NewWatcher() - if err != nil { - return nil, fmt.Errorf("failed to create fsnotify watcher: %w", err) - } - - // /etc/cni has to be readable for non-root users (0755), because /etc/cni/tuning/allowlist.conf is used for rootless mode too. - // This file was introduced in CNI plugins 1.2.0 (https://github.com/containernetworking/plugins/pull/693), and its path is hard-coded. - confDirParent := filepath.Dir(confDir) - if err := os.MkdirAll(confDirParent, 0755); err != nil { - return nil, fmt.Errorf("failed to create the parent of the cni conf dir=%s: %w", confDirParent, err) - } - - if err := os.MkdirAll(confDir, 0700); err != nil { - return nil, fmt.Errorf("failed to create cni conf dir=%s for watch: %w", confDir, err) - } - - if err := watcher.Add(confDir); err != nil { - return nil, fmt.Errorf("failed to watch cni conf dir %s: %w", confDir, err) - } - - syncer := &cniNetConfSyncer{ - watcher: watcher, - confDir: confDir, - netPlugin: netPlugin, - loadOpts: loadOpts, - } - - if err := syncer.netPlugin.Load(syncer.loadOpts...); err != nil { - log.L.WithError(err).Error("failed to load cni during init, please check CRI plugin status before setting up network for pods") - syncer.updateLastStatus(err) - } - return syncer, nil -} - -// syncLoop monitors any fs change events from cni conf dir and tries to reload -// cni configuration. -func (syncer *cniNetConfSyncer) syncLoop() error { - for { - select { - case event, ok := <-syncer.watcher.Events: - if !ok { - log.L.Debugf("cni watcher channel is closed") - return nil - } - // Only reload config when receiving write/rename/remove - // events - // - // TODO(fuweid): Might only reload target cni config - // files to prevent no-ops. - if event.Has(fsnotify.Chmod) || event.Has(fsnotify.Create) { - log.L.Debugf("ignore event from cni conf dir: %s", event) - continue - } - log.L.Debugf("receiving change event from cni conf dir: %s", event) - - lerr := syncer.netPlugin.Load(syncer.loadOpts...) - if lerr != nil { - log.L.WithError(lerr). - Errorf("failed to reload cni configuration after receiving fs change event(%s)", event) - } - syncer.updateLastStatus(lerr) - - case err := <-syncer.watcher.Errors: - if err != nil { - log.L.WithError(err).Error("failed to continue sync cni conf change") - return err - } - } - } -} - -// lastStatus retrieves last sync status. -func (syncer *cniNetConfSyncer) lastStatus() error { - syncer.RLock() - defer syncer.RUnlock() - return syncer.lastSyncStatus -} - -// updateLastStatus will be called after every single cni load. -func (syncer *cniNetConfSyncer) updateLastStatus(err error) { - syncer.Lock() - defer syncer.Unlock() - syncer.lastSyncStatus = err -} - -// stop stops watcher in the syncLoop. -func (syncer *cniNetConfSyncer) stop() error { - return syncer.watcher.Close() -} diff --git a/pkg/cri/sbserver/container_attach.go b/pkg/cri/sbserver/container_attach.go deleted file mode 100644 index 852707114..000000000 --- a/pkg/cri/sbserver/container_attach.go +++ /dev/null @@ -1,84 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "io" - - "github.com/containerd/containerd" - "github.com/containerd/log" - "k8s.io/client-go/tools/remotecommand" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - cio "github.com/containerd/containerd/pkg/cri/io" -) - -// Attach prepares a streaming endpoint to attach to a running container, and returns the address. -func (c *criService) Attach(ctx context.Context, r *runtime.AttachRequest) (*runtime.AttachResponse, error) { - cntr, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("failed to find container in store: %w", err) - } - state := cntr.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING { - return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state)) - } - return c.streamServer.GetAttach(r) -} - -func (c *criService) attachContainer(ctx context.Context, id string, stdin io.Reader, stdout, stderr io.WriteCloser, - tty bool, resize <-chan remotecommand.TerminalSize) error { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - // Get container from our container store. - cntr, err := c.containerStore.Get(id) - if err != nil { - return fmt.Errorf("failed to find container %q in store: %w", id, err) - } - id = cntr.ID - - state := cntr.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING { - return fmt.Errorf("container is in %s state", criContainerStateToString(state)) - } - - task, err := cntr.Container.Task(ctx, nil) - if err != nil { - return fmt.Errorf("failed to load task: %w", err) - } - handleResizing(ctx, resize, func(size remotecommand.TerminalSize) { - if err := task.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to resize task %q console", id) - } - }) - - opts := cio.AttachOptions{ - Stdin: stdin, - Stdout: stdout, - Stderr: stderr, - Tty: tty, - StdinOnce: cntr.Config.StdinOnce, - CloseStdin: func() error { - return task.CloseIO(ctx, containerd.WithStdinCloser) - }, - } - // TODO(random-liu): Figure out whether we need to support historical output. - cntr.IO.Attach(opts) - return nil -} diff --git a/pkg/cri/sbserver/container_checkpoint.go b/pkg/cri/sbserver/container_checkpoint.go deleted file mode 100644 index 1c017bd9a..000000000 --- a/pkg/cri/sbserver/container_checkpoint.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) { - return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented") -} diff --git a/pkg/cri/sbserver/container_create.go b/pkg/cri/sbserver/container_create.go deleted file mode 100644 index f97b01e7d..000000000 --- a/pkg/cri/sbserver/container_create.go +++ /dev/null @@ -1,1059 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - "path/filepath" - "strconv" - "strings" - "time" - - "github.com/containerd/typeurl/v2" - "github.com/davecgh/go-spew/spew" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux" - "github.com/opencontainers/selinux/go-selinux/label" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/pkg/blockio" - "github.com/containerd/containerd/pkg/cri/annotations" - criconfig "github.com/containerd/containerd/pkg/cri/config" - cio "github.com/containerd/containerd/pkg/cri/io" - customopts "github.com/containerd/containerd/pkg/cri/opts" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - "github.com/containerd/containerd/pkg/cri/util" - "github.com/containerd/containerd/platforms" - "github.com/containerd/log" -) - -func init() { - typeurl.Register(&containerstore.Metadata{}, - "github.com/containerd/cri/pkg/store/container", "Metadata") -} - -// CreateContainer creates a new container in the given PodSandbox. -func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (_ *runtime.CreateContainerResponse, retErr error) { - config := r.GetConfig() - log.G(ctx).Debugf("Container config %+v", config) - sandboxConfig := r.GetSandboxConfig() - sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) - if err != nil { - return nil, fmt.Errorf("failed to find sandbox id %q: %w", r.GetPodSandboxId(), err) - } - - controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox controller: %w", err) - } - - cstatus, err := controller.Status(ctx, sandbox.ID, false) - if err != nil { - return nil, fmt.Errorf("failed to get controller status: %w", err) - } - - var ( - sandboxID = cstatus.SandboxID - sandboxPid = cstatus.Pid - ) - - // Generate unique id and name for the container and reserve the name. - // Reserve the container name to avoid concurrent `CreateContainer` request creating - // the same container. - id := util.GenerateID() - metadata := config.GetMetadata() - if metadata == nil { - return nil, errors.New("container config must include metadata") - } - containerName := metadata.Name - name := makeContainerName(metadata, sandboxConfig.GetMetadata()) - log.G(ctx).Debugf("Generated id %q for container %q", id, name) - if err = c.containerNameIndex.Reserve(name, id); err != nil { - return nil, fmt.Errorf("failed to reserve container name %q: %w", name, err) - } - defer func() { - // Release the name if the function returns with an error. - if retErr != nil { - c.containerNameIndex.ReleaseByName(name) - } - }() - - // Create initial internal container metadata. - meta := containerstore.Metadata{ - ID: id, - Name: name, - SandboxID: sandboxID, - Config: config, - } - - // Prepare container image snapshot. For container, the image should have - // been pulled before creating the container, so do not ensure the image. - image, err := c.LocalResolve(config.GetImage().GetImage()) - if err != nil { - return nil, fmt.Errorf("failed to resolve image %q: %w", config.GetImage().GetImage(), err) - } - containerdImage, err := c.toContainerdImage(ctx, image) - if err != nil { - return nil, fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err) - } - - start := time.Now() - - // Create container root directory. - containerRootDir := c.getContainerRootDir(id) - if err = c.os.MkdirAll(containerRootDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create container root directory %q: %w", - containerRootDir, err) - } - defer func() { - if retErr != nil { - // Cleanup the container root directory. - if err = c.os.RemoveAll(containerRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove container root directory %q", - containerRootDir) - } - } - }() - volatileContainerRootDir := c.getVolatileContainerRootDir(id) - if err = c.os.MkdirAll(volatileContainerRootDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create volatile container root directory %q: %w", - volatileContainerRootDir, err) - } - defer func() { - if retErr != nil { - // Cleanup the volatile container root directory. - if err = c.os.RemoveAll(volatileContainerRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove volatile container root directory %q", - volatileContainerRootDir) - } - } - }() - - platform, err := controller.Platform(ctx, sandboxID) - if err != nil { - return nil, fmt.Errorf("failed to query sandbox platform: %w", err) - } - - var volumeMounts []*runtime.Mount - if !c.config.IgnoreImageDefinedVolumes { - // Create container image volumes mounts. - volumeMounts = c.volumeMounts(platform, containerRootDir, config, &image.ImageSpec.Config) - } else if len(image.ImageSpec.Config.Volumes) != 0 { - log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID) - } - - ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) - } - log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id) - - spec, err := c.buildContainerSpec( - platform, - id, - sandboxID, - sandboxPid, - sandbox.NetNSPath, - containerName, - containerdImage.Name(), - config, - sandboxConfig, - &image.ImageSpec.Config, - volumeMounts, - ociRuntime, - ) - if err != nil { - return nil, fmt.Errorf("failed to generate container %q spec: %w", id, err) - } - - meta.ProcessLabel = spec.Process.SelinuxLabel - - // handle any KVM based runtime - if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil { - return nil, err - } - - if config.GetLinux().GetSecurityContext().GetPrivileged() { - // If privileged don't set the SELinux label but still record it on the container so - // the unused MCS label can be release later - spec.Process.SelinuxLabel = "" - } - defer func() { - if retErr != nil { - selinux.ReleaseLabel(spec.Process.SelinuxLabel) - } - }() - - log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec)) - - // Grab any platform specific snapshotter opts. - sOpts, err := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config) - if err != nil { - return nil, err - } - - // Set snapshotter before any other options. - opts := []containerd.NewContainerOpts{ - containerd.WithSnapshotter(c.RuntimeSnapshotter(ctx, ociRuntime)), - // Prepare container rootfs. This is always writeable even if - // the container wants a readonly rootfs since we want to give - // the runtime (runc) a chance to modify (e.g. to create mount - // points corresponding to spec.Mounts) before making the - // rootfs readonly (requested by spec.Root.Readonly). - customopts.WithNewSnapshot(id, containerdImage, sOpts...), - } - if len(volumeMounts) > 0 { - mountMap := make(map[string]string) - for _, v := range volumeMounts { - mountMap[filepath.Clean(v.HostPath)] = v.ContainerPath - } - opts = append(opts, customopts.WithVolumes(mountMap, platform)) - } - meta.ImageRef = image.ID - meta.StopSignal = image.ImageSpec.Config.StopSignal - - // Validate log paths and compose full container log path. - if sandboxConfig.GetLogDirectory() != "" && config.GetLogPath() != "" { - meta.LogPath = filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath()) - log.G(ctx).Debugf("Composed container full log path %q using sandbox log dir %q and container log path %q", - meta.LogPath, sandboxConfig.GetLogDirectory(), config.GetLogPath()) - } else { - log.G(ctx).Infof("Logging will be disabled due to empty log paths for sandbox (%q) or container (%q)", - sandboxConfig.GetLogDirectory(), config.GetLogPath()) - } - - containerIO, err := cio.NewContainerIO(id, - cio.WithNewFIFOs(volatileContainerRootDir, config.GetTty(), config.GetStdin())) - if err != nil { - return nil, fmt.Errorf("failed to create container io: %w", err) - } - defer func() { - if retErr != nil { - if err := containerIO.Close(); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to close container io %q", id) - } - } - }() - - specOpts, err := c.platformSpecOpts(platform, config, &image.ImageSpec.Config) - if err != nil { - return nil, fmt.Errorf("failed to get container spec opts: %w", err) - } - - containerLabels := buildLabels(config.Labels, image.ImageSpec.Config.Labels, containerKindContainer) - - sandboxInfo, err := c.client.SandboxStore().Get(ctx, sandboxID) - if err != nil { - return nil, fmt.Errorf("unable to get sandbox %q metdata: %w", sandboxID, err) - } - - opts = append(opts, - containerd.WithSpec(spec, specOpts...), - containerd.WithRuntime(sandboxInfo.Runtime.Name, sandboxInfo.Runtime.Options), - containerd.WithContainerLabels(containerLabels), - containerd.WithContainerExtension(containerMetadataExtension, &meta), - ) - - // When using sandboxed shims, containerd's runtime needs to know which sandbox shim instance to use. - if ociRuntime.SandboxMode == string(criconfig.ModeShim) { - opts = append(opts, containerd.WithSandbox(sandboxID)) - } - - opts = append(opts, c.nri.WithContainerAdjustment()) - defer func() { - if retErr != nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - c.nri.UndoCreateContainer(deferCtx, &sandbox, id, spec) - } - }() - - var cntr containerd.Container - if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil { - return nil, fmt.Errorf("failed to create containerd container: %w", err) - } - defer func() { - if retErr != nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - if err := cntr.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id) - } - } - }() - - status := containerstore.Status{CreatedAt: time.Now().UnixNano()} - status = copyResourcesToStatus(spec, status) - container, err := containerstore.NewContainer(meta, - containerstore.WithStatus(status, containerRootDir), - containerstore.WithContainer(cntr), - containerstore.WithContainerIO(containerIO), - ) - if err != nil { - return nil, fmt.Errorf("failed to create internal container object for %q: %w", id, err) - } - defer func() { - if retErr != nil { - // Cleanup container checkpoint on error. - if err := container.Delete(); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to cleanup container checkpoint for %q", id) - } - } - }() - - // Add container into container store. - if err := c.containerStore.Add(container); err != nil { - return nil, fmt.Errorf("failed to add container %q into store: %w", id, err) - } - - c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) - - err = c.nri.PostCreateContainer(ctx, &sandbox, &container) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI post-create notification failed") - } - - containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start) - - return &runtime.CreateContainerResponse{ContainerId: id}, nil -} - -// volumeMounts sets up image volumes for container. Rely on the removal of container -// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts -// specified with the same destination. -func (c *criService) volumeMounts(platform platforms.Platform, containerRootDir string, containerConfig *runtime.ContainerConfig, config *imagespec.ImageConfig) []*runtime.Mount { - var uidMappings, gidMappings []*runtime.IDMapping - if platform.OS == "linux" { - if usernsOpts := containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { - uidMappings = usernsOpts.GetUids() - gidMappings = usernsOpts.GetGids() - } - } - - criMounts := containerConfig.GetMounts() - - if len(config.Volumes) == 0 { - return nil - } - var mounts []*runtime.Mount - for dst := range config.Volumes { - if isInCRIMounts(dst, criMounts) { - // Skip the image volume, if there is CRI defined volume mapping. - // TODO(random-liu): This should be handled by Kubelet in the future. - // Kubelet should decide what to use for image volume, and also de-duplicate - // the image volume and user mounts. - continue - } - volumeID := util.GenerateID() - src := filepath.Join(containerRootDir, "volumes", volumeID) - // When the platform OS is Linux, ensure dst is a _Linux_ abs path. - // We can't use filepath.IsAbs() because, when executing on Windows, it checks for - // Windows abs paths. - if platform.OS == "linux" && !strings.HasPrefix(dst, "/") { - // On Windows, ToSlash() is needed to ensure the path is a valid Linux path. - // On Linux, ToSlash() is a no-op. - oldDst := dst - dst = filepath.ToSlash(filepath.Join("/", dst)) - log.L.Debugf("Volume destination %q is not absolute, converted to %q", oldDst, dst) - } - // addOCIBindMounts will create these volumes. - mounts = append(mounts, &runtime.Mount{ - ContainerPath: dst, - HostPath: src, - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - return mounts -} - -// runtimeSpec returns a default runtime spec used in cri-containerd. -func (c *criService) runtimeSpec(id string, platform platforms.Platform, baseSpecFile string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { - // GenerateSpec needs namespace. - ctx := util.NamespacedContext() - container := &containers.Container{ID: id} - - if baseSpecFile != "" { - baseSpec, ok := c.baseOCISpecs[baseSpecFile] - if !ok { - return nil, fmt.Errorf("can't find base OCI spec %q", baseSpecFile) - } - - spec := oci.Spec{} - if err := util.DeepCopy(&spec, &baseSpec); err != nil { - return nil, fmt.Errorf("failed to clone OCI spec: %w", err) - } - - // Fix up cgroups path - applyOpts := append([]oci.SpecOpts{oci.WithNamespacedCgroup()}, opts...) - - if err := oci.ApplyOpts(ctx, nil, container, &spec, applyOpts...); err != nil { - return nil, fmt.Errorf("failed to apply OCI options: %w", err) - } - - return &spec, nil - } - - spec, err := oci.GenerateSpecWithPlatform(ctx, nil, platforms.Format(platform), container, opts...) - if err != nil { - return nil, fmt.Errorf("failed to generate spec: %w", err) - } - - return spec, nil -} - -const ( - // relativeRootfsPath is the rootfs path relative to bundle path. - relativeRootfsPath = "rootfs" - // hostnameEnv is the key for HOSTNAME env. - hostnameEnv = "HOSTNAME" -) - -// generateUserString generates valid user string based on OCI Image Spec -// v1.0.0. -// -// CRI defines that the following combinations are valid: -// -// (none) -> "" -// username -> username -// username, uid -> username -// username, uid, gid -> username:gid -// username, gid -> username:gid -// uid -> uid -// uid, gid -> uid:gid -// gid -> error -// -// TODO(random-liu): Add group name support in CRI. -func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) { - var userstr, groupstr string - if uid != nil { - userstr = strconv.FormatInt(uid.GetValue(), 10) - } - if username != "" { - userstr = username - } - if gid != nil { - groupstr = strconv.FormatInt(gid.GetValue(), 10) - } - if userstr == "" { - if groupstr != "" { - return "", fmt.Errorf("user group %q is specified without user", groupstr) - } - return "", nil - } - if groupstr != "" { - userstr = userstr + ":" + groupstr - } - return userstr, nil -} - -// platformSpecOpts adds additional runtime spec options that may rely on -// runtime information (rootfs mounted), or platform specific checks with -// no defined workaround (yet) to specify for other platforms. -func (c *criService) platformSpecOpts( - platform platforms.Platform, - config *runtime.ContainerConfig, - imageConfig *imagespec.ImageConfig, -) ([]oci.SpecOpts, error) { - var specOpts []oci.SpecOpts - - // First deal with the set of options we can use across platforms currently. - // Linux user strings have workarounds on other platforms to avoid needing to - // mount the rootfs, but on Linux hosts it must be mounted - // - // TODO(dcantah): I think the seccomp package can be made to compile on - // !linux and used here as well. - if platform.OS == "linux" { - // Set container username. This could only be done by containerd, because it needs - // access to the container rootfs. Pass user name to containerd, and let it overwrite - // the spec for us. - securityContext := config.GetLinux().GetSecurityContext() - userstr, err := generateUserString( - securityContext.GetRunAsUsername(), - securityContext.GetRunAsUser(), - securityContext.GetRunAsGroup()) - if err != nil { - return nil, fmt.Errorf("failed to generate user string: %w", err) - } - if userstr == "" { - // Lastly, since no user override was passed via CRI try to set via OCI - // Image - userstr = imageConfig.User - } - if userstr != "" { - specOpts = append(specOpts, oci.WithUser(userstr)) - } - } - - // Now grab the truly platform specific options (seccomp, apparmor etc. for linux - // for example). - ctrSpecOpts, err := c.containerSpecOpts(config, imageConfig) - if err != nil { - return nil, err - } - specOpts = append(specOpts, ctrSpecOpts...) - - return specOpts, nil -} - -// buildContainerSpec build container's OCI spec depending on controller's target platform OS. -func (c *criService) buildContainerSpec( - platform platforms.Platform, - id string, - sandboxID string, - sandboxPid uint32, - netNSPath string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime criconfig.Runtime, -) (_ *runtimespec.Spec, retErr error) { - var ( - specOpts []oci.SpecOpts - err error - - // Platform helpers - isLinux = platform.OS == "linux" - isWindows = platform.OS == "windows" - isDarwin = platform.OS == "darwin" - ) - - switch { - case isLinux: - // Generate container mounts. - // No mounts are passed for other platforms. - linuxMounts := c.linuxContainerMounts(sandboxID, config) - - specOpts, err = c.buildLinuxSpec( - id, - sandboxID, - sandboxPid, - netNSPath, - containerName, - imageName, - config, - sandboxConfig, - imageConfig, - append(linuxMounts, extraMounts...), - ociRuntime, - ) - case isWindows: - specOpts, err = c.buildWindowsSpec( - id, - sandboxID, - sandboxPid, - netNSPath, - containerName, - imageName, - config, - sandboxConfig, - imageConfig, - extraMounts, - ociRuntime, - ) - case isDarwin: - specOpts, err = c.buildDarwinSpec( - id, - sandboxID, - containerName, - imageName, - config, - sandboxConfig, - imageConfig, - extraMounts, - ociRuntime, - ) - default: - return nil, fmt.Errorf("unsupported spec platform: %s", platform.OS) - } - - if err != nil { - return nil, fmt.Errorf("failed to generate spec opts: %w", err) - } - - return c.runtimeSpec(id, platform, ociRuntime.BaseRuntimeSpec, specOpts...) -} - -func (c *criService) buildLinuxSpec( - id string, - sandboxID string, - sandboxPid uint32, - netNSPath string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime criconfig.Runtime, -) (_ []oci.SpecOpts, retErr error) { - specOpts := []oci.SpecOpts{ - oci.WithoutRunMount, - } - // only clear the default security settings if the runtime does not have a custom - // base runtime spec spec. Admins can use this functionality to define - // default ulimits, seccomp, or other default settings. - if ociRuntime.BaseRuntimeSpec == "" { - specOpts = append(specOpts, customopts.WithoutDefaultSecuritySettings) - } - - specOpts = append(specOpts, - customopts.WithRelativeRoot(relativeRootfsPath), - customopts.WithProcessArgs(config, imageConfig), - oci.WithDefaultPathEnv, - // this will be set based on the security context below - oci.WithNewPrivileges, - ) - - if config.GetWorkingDir() != "" { - specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) - } else if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if config.GetTty() { - specOpts = append(specOpts, oci.WithTTY) - } - - // Add HOSTNAME env. - var ( - err error - hostname = sandboxConfig.GetHostname() - ) - if hostname == "" { - if hostname, err = c.os.Hostname(); err != nil { - return nil, err - } - } - specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname})) - - // Apply envs from image config first, so that envs from container config - // can override them. - env := append([]string{}, imageConfig.Env...) - for _, e := range config.GetEnvs() { - env = append(env, e.GetKey()+"="+e.GetValue()) - } - specOpts = append(specOpts, oci.WithEnv(env)) - - securityContext := config.GetLinux().GetSecurityContext() - labelOptions, err := toLabel(securityContext.GetSelinuxOptions()) - if err != nil { - return nil, err - } - if len(labelOptions) == 0 { - // Use pod level SELinux config - if sandbox, err := c.sandboxStore.Get(sandboxID); err == nil { - labelOptions, err = selinux.DupSecOpt(sandbox.ProcessLabel) - if err != nil { - return nil, err - } - } - } - - processLabel, mountLabel, err := label.InitLabels(labelOptions) - if err != nil { - return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err) - } - defer func() { - if retErr != nil { - selinux.ReleaseLabel(processLabel) - } - }() - - specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel)) - - if !c.config.DisableProcMount { - // Change the default masked/readonly paths to empty slices - // See https://github.com/containerd/containerd/issues/5029 - // TODO: Provide an option to set default paths to the ones in oci.populateDefaultUnixSpec() - specOpts = append(specOpts, oci.WithMaskedPaths([]string{}), oci.WithReadonlyPaths([]string{})) - - // Apply masked paths if specified. - // If the container is privileged, this will be cleared later on. - if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil { - specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths)) - } - - // Apply readonly paths if specified. - // If the container is privileged, this will be cleared later on. - if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil { - specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths)) - } - } - - specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext), - customopts.WithCapabilities(securityContext, c.allCaps)) - - if securityContext.GetPrivileged() { - if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() { - return nil, errors.New("no privileged container allowed in sandbox") - } - specOpts = append(specOpts, oci.WithPrivileged) - if !ociRuntime.PrivilegedWithoutHostDevices { - specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed) - } else if ociRuntime.PrivilegedWithoutHostDevicesAllDevicesAllowed { - // allow rwm on all devices for the container - specOpts = append(specOpts, oci.WithAllDevicesAllowed) - } - } - - // Clear all ambient capabilities. The implication of non-root + caps - // is not clearly defined in Kubernetes. - // See https://github.com/kubernetes/kubernetes/issues/56374 - // Keep docker's behavior for now. - specOpts = append(specOpts, - customopts.WithoutAmbientCaps, - customopts.WithSelinuxLabels(processLabel, mountLabel), - ) - - // TODO: Figure out whether we should set no new privilege for sandbox container by default - if securityContext.GetNoNewPrivs() { - specOpts = append(specOpts, oci.WithNoNewPrivileges) - } - // TODO(random-liu): [P1] Set selinux options (privileged or not). - if securityContext.GetReadonlyRootfs() { - specOpts = append(specOpts, oci.WithRootFSReadonly()) - } - - if c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDisabledCgroups) - } else { - specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources(), c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController)) - if sandboxConfig.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id) - specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) - } - } - - supplementalGroups := securityContext.GetSupplementalGroups() - - // Get blockio class - blockIOClass, err := c.blockIOClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) - if err != nil { - return nil, fmt.Errorf("failed to set blockio class: %w", err) - } - if blockIOClass != "" { - if linuxBlockIO, err := blockio.ClassNameToLinuxOCI(blockIOClass); err == nil { - specOpts = append(specOpts, oci.WithBlockIO(linuxBlockIO)) - } else { - return nil, err - } - } - - // Get RDT class - rdtClass, err := c.rdtClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) - if err != nil { - return nil, fmt.Errorf("failed to set RDT class: %w", err) - } - if rdtClass != "" { - specOpts = append(specOpts, oci.WithRdt(rdtClass, "", "")) - } - - for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, - ociRuntime.PodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - ociRuntime.ContainerAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - // Default target PID namespace is the sandbox PID. - targetPid := sandboxPid - // If the container targets another container's PID namespace, - // set targetPid to the PID of that container. - nsOpts := securityContext.GetNamespaceOptions() - if nsOpts.GetPid() == runtime.NamespaceMode_TARGET { - targetContainer, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId) - if err != nil { - return nil, fmt.Errorf("invalid target container: %w", err) - } - - status := targetContainer.Status.Get() - targetPid = status.Pid - } - - uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions()) - if err != nil { - return nil, fmt.Errorf("user namespace configuration: %w", err) - } - - // Check sandbox userns config is consistent with container config. - sandboxUsernsOpts := sandboxConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() - if !sameUsernsConfig(sandboxUsernsOpts, nsOpts.GetUsernsOptions()) { - return nil, fmt.Errorf("user namespace config for sandbox is different from container. Sandbox userns config: %v - Container userns config: %v", sandboxUsernsOpts, nsOpts.GetUsernsOptions()) - } - - specOpts = append(specOpts, - customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), - customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids), - customopts.WithSupplementalGroups(supplementalGroups), - ) - specOpts = append( - specOpts, - annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., - ) - - // cgroupns is used for hiding /sys/fs/cgroup from containers. - // For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged. - // https://github.com/containers/libpod/issues/4363 - // https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace - if isUnifiedCgroupsMode() && !securityContext.GetPrivileged() { - specOpts = append(specOpts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.CgroupNamespace})) - } - - return specOpts, nil -} - -func (c *criService) buildWindowsSpec( - id string, - sandboxID string, - sandboxPid uint32, - netNSPath string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime criconfig.Runtime, -) (_ []oci.SpecOpts, retErr error) { - var specOpts []oci.SpecOpts - specOpts = append(specOpts, customopts.WithProcessCommandLineOrArgsForWindows(config, imageConfig)) - - // All containers in a pod need to have HostProcess set if it was set on the pod, - // and vice versa no containers in the pod can be HostProcess if the pods spec - // didn't have the field set. The only case that is valid is if these are the same value. - cntrHpc := config.GetWindows().GetSecurityContext().GetHostProcess() - sandboxHpc := sandboxConfig.GetWindows().GetSecurityContext().GetHostProcess() - if cntrHpc != sandboxHpc { - return nil, errors.New("pod spec and all containers inside must have the HostProcess field set to be valid") - } - - if config.GetWorkingDir() != "" { - specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) - } else if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if config.GetTty() { - specOpts = append(specOpts, oci.WithTTY) - } - - // Apply envs from image config first, so that envs from container config - // can override them. - env := append([]string{}, imageConfig.Env...) - for _, e := range config.GetEnvs() { - env = append(env, e.GetKey()+"="+e.GetValue()) - } - specOpts = append(specOpts, oci.WithEnv(env)) - - specOpts = append(specOpts, - // Clear the root location since hcsshim expects it. - // NOTE: readonly rootfs doesn't work on windows. - customopts.WithoutRoot, - oci.WithWindowsNetworkNamespace(netNSPath), - oci.WithHostname(sandboxConfig.GetHostname()), - ) - - specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts), customopts.WithWindowsDevices(config)) - - // Start with the image config user and override below if RunAsUsername is not "". - username := imageConfig.User - - windowsConfig := config.GetWindows() - if windowsConfig != nil { - specOpts = append(specOpts, customopts.WithWindowsResources(windowsConfig.GetResources())) - securityCtx := windowsConfig.GetSecurityContext() - if securityCtx != nil { - runAsUser := securityCtx.GetRunAsUsername() - if runAsUser != "" { - username = runAsUser - } - cs := securityCtx.GetCredentialSpec() - if cs != "" { - specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs)) - } - } - } - - // There really isn't a good Windows way to verify that the username is available in the - // image as early as here like there is for Linux. Later on in the stack hcsshim - // will handle the behavior of erroring out if the user isn't available in the image - // when trying to run the init process. - specOpts = append(specOpts, oci.WithUser(username)) - - for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, - ociRuntime.PodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - ociRuntime.ContainerAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(sandboxHpc))) - specOpts = append(specOpts, - annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., - ) - - return specOpts, nil -} - -func (c *criService) buildDarwinSpec( - id string, - sandboxID string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime criconfig.Runtime, -) (_ []oci.SpecOpts, retErr error) { - specOpts := []oci.SpecOpts{ - customopts.WithProcessArgs(config, imageConfig), - } - - if config.GetWorkingDir() != "" { - specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) - } else if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if config.GetTty() { - specOpts = append(specOpts, oci.WithTTY) - } - - // Apply envs from image config first, so that envs from container config - // can override them. - env := append([]string{}, imageConfig.Env...) - for _, e := range config.GetEnvs() { - env = append(env, e.GetKey()+"="+e.GetValue()) - } - specOpts = append(specOpts, oci.WithEnv(env)) - - specOpts = append(specOpts, customopts.WithDarwinMounts(c.os, config, extraMounts)) - - for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, - ociRuntime.PodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - ociRuntime.ContainerAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, - annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., - ) - - return specOpts, nil -} - -// linuxContainerMounts sets up necessary container system file mounts -// including /dev/shm, /etc/hosts and /etc/resolv.conf. -func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { - var mounts []*runtime.Mount - securityContext := config.GetLinux().GetSecurityContext() - var uidMappings, gidMappings []*runtime.IDMapping - if usernsOpts := securityContext.GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { - uidMappings = usernsOpts.GetUids() - gidMappings = usernsOpts.GetGids() - } - - if !isInCRIMounts(etcHostname, config.GetMounts()) { - // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. - // For in-place upgrade, the old sandbox doesn't have the hostname file, - // do not mount this in that case. - // TODO(random-liu): Remove the check and always mount this when - // containerd 1.1 and 1.2 are deprecated. - hostpath := c.getSandboxHostname(sandboxID) - if _, err := c.os.Stat(hostpath); err == nil { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHostname, - HostPath: hostpath, - Readonly: securityContext.GetReadonlyRootfs(), - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - } - - if !isInCRIMounts(etcHosts, config.GetMounts()) { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHosts, - HostPath: c.getSandboxHosts(sandboxID), - Readonly: securityContext.GetReadonlyRootfs(), - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - - // Mount sandbox resolv.config. - // TODO: Need to figure out whether we should always mount it as read-only - if !isInCRIMounts(resolvConfPath, config.GetMounts()) { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: resolvConfPath, - HostPath: c.getResolvPath(sandboxID), - Readonly: securityContext.GetReadonlyRootfs(), - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - - if !isInCRIMounts(devShm, config.GetMounts()) { - sandboxDevShm := c.getSandboxDevShm(sandboxID) - if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { - sandboxDevShm = devShm - } - mounts = append(mounts, &runtime.Mount{ - ContainerPath: devShm, - HostPath: sandboxDevShm, - Readonly: false, - SelinuxRelabel: sandboxDevShm != devShm, - // XXX: tmpfs support for idmap mounts got merged in - // Linux 6.3. - // Our Ubuntu 22.04 CI runs with 5.15 kernels, so - // disabling idmap mounts for this case makes the CI - // happy (the other fs used support idmap mounts in 5.15 - // kernels). - // We can enable this at a later stage, but as this - // tmpfs mount is exposed empty to the container (no - // prepopulated files) and using the hostIPC with userns - // is blocked by k8s, we can just avoid using the - // mappings and it should work fine. - }) - } - return mounts -} diff --git a/pkg/cri/sbserver/container_create_linux.go b/pkg/cri/sbserver/container_create_linux.go deleted file mode 100644 index 71ad55476..000000000 --- a/pkg/cri/sbserver/container_create_linux.go +++ /dev/null @@ -1,270 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "bufio" - "errors" - "fmt" - "io" - "os" - "strconv" - "strings" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/contrib/apparmor" - "github.com/containerd/containerd/contrib/seccomp" - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/snapshots" - - customopts "github.com/containerd/containerd/pkg/cri/opts" -) - -const ( - // profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName. - profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747 - // runtimeDefault indicates that we should use or create a runtime default profile. - runtimeDefault = "runtime/default" - // dockerDefault indicates that we should use or create a docker default profile. - dockerDefault = "docker/default" - // appArmorDefaultProfileName is name to use when creating a default apparmor profile. - appArmorDefaultProfileName = "cri-containerd.apparmor.d" - // unconfinedProfile is a string indicating one should run a pod/containerd without a security profile - unconfinedProfile = "unconfined" - // seccompDefaultProfile is the default seccomp profile. - seccompDefaultProfile = dockerDefault -) - -func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - var ( - specOpts []oci.SpecOpts - err error - ) - securityContext := config.GetLinux().GetSecurityContext() - userstr := "0" // runtime default - if securityContext.GetRunAsUsername() != "" { - userstr = securityContext.GetRunAsUsername() - } else if securityContext.GetRunAsUser() != nil { - userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10) - } else if imageConfig.User != "" { - userstr, _, _ = strings.Cut(imageConfig.User, ":") - } - specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr), - customopts.WithSupplementalGroups(securityContext.GetSupplementalGroups())) - - asp := securityContext.GetApparmor() - if asp == nil { - asp, err = generateApparmorSecurityProfile(securityContext.GetApparmorProfile()) //nolint:staticcheck // Deprecated but we don't want to remove yet - if err != nil { - return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err) - } - } - apparmorSpecOpts, err := generateApparmorSpecOpts( - asp, - securityContext.GetPrivileged(), - c.apparmorEnabled()) - if err != nil { - return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err) - } - if apparmorSpecOpts != nil { - specOpts = append(specOpts, apparmorSpecOpts) - } - - ssp := securityContext.GetSeccomp() - if ssp == nil { - ssp, err = generateSeccompSecurityProfile( - securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet - c.config.UnsetSeccompProfile) - if err != nil { - return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) - } - } - seccompSpecOpts, err := c.generateSeccompSpecOpts( - ssp, - securityContext.GetPrivileged(), - c.seccompEnabled()) - if err != nil { - return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) - } - if seccompSpecOpts != nil { - specOpts = append(specOpts, seccompSpecOpts) - } - if c.config.EnableCDI { - specOpts = append(specOpts, customopts.WithCDI(config.Annotations, config.CDIDevices)) - } - return specOpts, nil -} - -func generateSeccompSecurityProfile(profilePath string, unsetProfilePath string) (*runtime.SecurityProfile, error) { - if profilePath != "" { - return generateSecurityProfile(profilePath) - } - if unsetProfilePath != "" { - return generateSecurityProfile(unsetProfilePath) - } - return nil, nil -} -func generateApparmorSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) { - if profilePath != "" { - return generateSecurityProfile(profilePath) - } - return nil, nil -} - -func generateSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) { - switch profilePath { - case runtimeDefault, dockerDefault, "": - return &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, nil - case unconfinedProfile: - return &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Unconfined, - }, nil - default: - // Require and Trim default profile name prefix - if !strings.HasPrefix(profilePath, profileNamePrefix) { - return nil, fmt.Errorf("invalid profile %q", profilePath) - } - return &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Localhost, - LocalhostRef: strings.TrimPrefix(profilePath, profileNamePrefix), - }, nil - } -} - -// generateSeccompSpecOpts generates containerd SpecOpts for seccomp. -func (c *criService) generateSeccompSpecOpts(sp *runtime.SecurityProfile, privileged, seccompEnabled bool) (oci.SpecOpts, error) { - if privileged { - // Do not set seccomp profile when container is privileged - return nil, nil - } - if !seccompEnabled { - if sp != nil { - if sp.ProfileType != runtime.SecurityProfile_Unconfined { - return nil, errors.New("seccomp is not supported") - } - } - return nil, nil - } - - if sp == nil { - return nil, nil - } - - if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" { - return nil, errors.New("seccomp config invalid LocalhostRef must only be set if ProfileType is Localhost") - } - switch sp.ProfileType { - case runtime.SecurityProfile_Unconfined: - // Do not set seccomp profile. - return nil, nil - case runtime.SecurityProfile_RuntimeDefault: - return seccomp.WithDefaultProfile(), nil - case runtime.SecurityProfile_Localhost: - // trimming the localhost/ prefix just in case even though it should not - // be necessary with the new SecurityProfile struct - return seccomp.WithProfile(strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)), nil - default: - return nil, errors.New("seccomp unknown ProfileType") - } -} - -// generateApparmorSpecOpts generates containerd SpecOpts for apparmor. -func generateApparmorSpecOpts(sp *runtime.SecurityProfile, privileged, apparmorEnabled bool) (oci.SpecOpts, error) { - if !apparmorEnabled { - // Should fail loudly if user try to specify apparmor profile - // but we don't support it. - if sp != nil { - if sp.ProfileType != runtime.SecurityProfile_Unconfined { - return nil, errors.New("apparmor is not supported") - } - } - return nil, nil - } - - if sp == nil { - // Based on kubernetes#51746, default apparmor profile should be applied - // for when apparmor is not specified. - sp, _ = generateSecurityProfile("") - } - - if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" { - return nil, errors.New("apparmor config invalid LocalhostRef must only be set if ProfileType is Localhost") - } - - switch sp.ProfileType { - case runtime.SecurityProfile_Unconfined: - // Do not set apparmor profile. - return nil, nil - case runtime.SecurityProfile_RuntimeDefault: - if privileged { - // Do not set apparmor profile when container is privileged - return nil, nil - } - // TODO (mikebrow): delete created apparmor default profile - return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil - case runtime.SecurityProfile_Localhost: - // trimming the localhost/ prefix just in case even through it should not - // be necessary with the new SecurityProfile struct - appArmorProfile := strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix) - if profileExists, err := appArmorProfileExists(appArmorProfile); !profileExists { - if err != nil { - return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err) - } - return nil, fmt.Errorf("apparmor profile not found %s", appArmorProfile) - } - return apparmor.WithProfile(appArmorProfile), nil - default: - return nil, errors.New("apparmor unknown ProfileType") - } -} - -// appArmorProfileExists scans apparmor/profiles for the requested profile -func appArmorProfileExists(profile string) (bool, error) { - if profile == "" { - return false, errors.New("nil apparmor profile is not supported") - } - profiles, err := os.Open("/sys/kernel/security/apparmor/profiles") - if err != nil { - return false, err - } - defer profiles.Close() - - rbuff := bufio.NewReader(profiles) - for { - line, err := rbuff.ReadString('\n') - switch err { - case nil: - if strings.HasPrefix(line, profile+" (") { - return true, nil - } - case io.EOF: - return false, nil - default: - return false, err - } - } -} - -// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot -func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { - nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions() - return snapshotterRemapOpts(nsOpts) -} diff --git a/pkg/cri/sbserver/container_create_linux_test.go b/pkg/cri/sbserver/container_create_linux_test.go deleted file mode 100644 index 6be91dc00..000000000 --- a/pkg/cri/sbserver/container_create_linux_test.go +++ /dev/null @@ -1,2075 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "os" - "path/filepath" - "reflect" - "strings" - "testing" - - "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" - "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/contrib/apparmor" - "github.com/containerd/containerd/contrib/seccomp" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/platforms" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cap" - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/opts" - customopts "github.com/containerd/containerd/pkg/cri/opts" - "github.com/containerd/containerd/pkg/cri/util" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" - ostesting "github.com/containerd/containerd/pkg/os/testing" -) - -func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, - *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{ - Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", - }, - Command: []string{"test", "command"}, - Args: []string{"test", "args"}, - WorkingDir: "test-cwd", - Envs: []*runtime.KeyValue{ - {Key: "k1", Value: "v1"}, - {Key: "k2", Value: "v2"}, - {Key: "k3", Value: "v3=v3bis"}, - {Key: "k4", Value: "v4=v4bis=foop"}, - }, - Mounts: []*runtime.Mount{ - // everything default - { - ContainerPath: "container-path-1", - HostPath: "host-path-1", - }, - // readOnly - { - ContainerPath: "container-path-2", - HostPath: "host-path-2", - Readonly: true, - }, - }, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"ca-c": "ca-d"}, - Linux: &runtime.LinuxContainerConfig{ - Resources: &runtime.LinuxContainerResources{ - CpuPeriod: 100, - CpuQuota: 200, - CpuShares: 300, - MemoryLimitInBytes: 400, - OomScoreAdj: 500, - CpusetCpus: "0-1", - CpusetMems: "2-3", - Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, - }, - SecurityContext: &runtime.LinuxContainerSecurityContext{ - SupplementalGroups: []int64{1111, 2222}, - NoNewPrivs: true, - }, - }, - } - sandboxConfig := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-sandbox-name", - Uid: "test-sandbox-uid", - Namespace: "test-sandbox-ns", - Attempt: 2, - }, - Annotations: map[string]string{"c": "d"}, - Linux: &runtime.LinuxPodSandboxConfig{ - CgroupParent: "/test/cgroup/parent", - SecurityContext: &runtime.LinuxSandboxSecurityContext{}, - }, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, - Entrypoint: []string{"/entrypoint"}, - Cmd: []string{"cmd"}, - WorkingDir: "/workspace", - } - specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { - assert.Equal(t, relativeRootfsPath, spec.Root.Path) - assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) - assert.Equal(t, "test-cwd", spec.Process.Cwd) - assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") - assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") - - t.Logf("Check cgroups bind mount") - checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, nil) - - t.Logf("Check bind mount") - checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rbind", "rprivate", "rw"}, nil) - checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"rbind", "rprivate", "ro"}, nil) - - t.Logf("Check resource limits") - assert.EqualValues(t, *spec.Linux.Resources.CPU.Period, 100) - assert.EqualValues(t, *spec.Linux.Resources.CPU.Quota, 200) - assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, 300) - assert.EqualValues(t, spec.Linux.Resources.CPU.Cpus, "0-1") - assert.EqualValues(t, spec.Linux.Resources.CPU.Mems, "2-3") - assert.EqualValues(t, spec.Linux.Resources.Unified, map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}) - assert.EqualValues(t, *spec.Linux.Resources.Memory.Limit, 400) - assert.EqualValues(t, *spec.Process.OOMScoreAdj, 500) - - t.Logf("Check supplemental groups") - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) - - t.Logf("Check no_new_privs") - assert.Equal(t, spec.Process.NoNewPrivileges, true) - - t.Logf("Check cgroup path") - assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) - - t.Logf("Check namespaces") - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - Path: opts.GetNetworkNamespace(sandboxPid), - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.IPCNamespace, - Path: opts.GetIPCNamespace(sandboxPid), - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UTSNamespace, - Path: opts.GetUTSNamespace(sandboxPid), - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - Path: opts.GetPIDNamespace(sandboxPid), - }) - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) - - assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) - assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns") - - assert.Contains(t, spec.Annotations, annotations.SandboxUID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid") - - assert.Contains(t, spec.Annotations, annotations.SandboxName) - assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name") - - assert.Contains(t, spec.Annotations, annotations.ImageName) - assert.EqualValues(t, spec.Annotations[annotations.ImageName], testImageName) - } - return config, sandboxConfig, imageConfig, specCheck -} - -func TestContainerCapabilities(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - allCaps := cap.Known() - for _, test := range []struct { - desc string - capability *runtime.Capability - includes []string - excludes []string - }{ - { - desc: "should be able to add/drop capabilities", - capability: &runtime.Capability{ - AddCapabilities: []string{"SYS_ADMIN"}, - DropCapabilities: []string{"CHOWN"}, - }, - includes: []string{"CAP_SYS_ADMIN"}, - excludes: []string{"CAP_CHOWN"}, - }, - { - desc: "should be able to add all capabilities", - capability: &runtime.Capability{ - AddCapabilities: []string{"ALL"}, - }, - includes: allCaps, - }, - { - desc: "should be able to drop all capabilities", - capability: &runtime.Capability{ - DropCapabilities: []string{"ALL"}, - }, - excludes: allCaps, - }, - { - desc: "should be able to drop capabilities with add all", - capability: &runtime.Capability{ - AddCapabilities: []string{"ALL"}, - DropCapabilities: []string{"CHOWN"}, - }, - includes: util.SubtractStringSlice(allCaps, "CAP_CHOWN"), - excludes: []string{"CAP_CHOWN"}, - }, - { - desc: "should be able to add capabilities with drop all", - capability: &runtime.Capability{ - AddCapabilities: []string{"SYS_ADMIN"}, - DropCapabilities: []string{"ALL"}, - }, - includes: []string{"CAP_SYS_ADMIN"}, - excludes: util.SubtractStringSlice(allCaps, "CAP_SYS_ADMIN"), - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - c.allCaps = allCaps - - containerConfig.Linux.SecurityContext.Capabilities = test.capability - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - - if selinux.GetEnabled() { - assert.NotEqual(t, "", spec.Process.SelinuxLabel) - assert.NotEqual(t, "", spec.Linux.MountLabel) - } - - specCheck(t, testID, testSandboxID, testPid, spec) - for _, include := range test.includes { - assert.Contains(t, spec.Process.Capabilities.Bounding, include) - assert.Contains(t, spec.Process.Capabilities.Effective, include) - assert.Contains(t, spec.Process.Capabilities.Permitted, include) - } - for _, exclude := range test.excludes { - assert.NotContains(t, spec.Process.Capabilities.Bounding, exclude) - assert.NotContains(t, spec.Process.Capabilities.Effective, exclude) - assert.NotContains(t, spec.Process.Capabilities.Permitted, exclude) - } - assert.Empty(t, spec.Process.Capabilities.Inheritable) - assert.Empty(t, spec.Process.Capabilities.Ambient) - }) - } -} - -func TestContainerSpecTty(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, tty := range []bool{true, false} { - containerConfig.Tty = tty - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Equal(t, tty, spec.Process.Terminal) - if tty { - assert.Contains(t, spec.Process.Env, "TERM=xterm") - } else { - assert.NotContains(t, spec.Process.Env, "TERM=xterm") - } - } -} - -func TestContainerSpecDefaultPath(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - expectedDefault := "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, pathenv := range []string{"", "PATH=/usr/local/bin/games"} { - expected := expectedDefault - if pathenv != "" { - imageConfig.Env = append(imageConfig.Env, pathenv) - expected = pathenv - } - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Contains(t, spec.Process.Env, expected) - } -} - -func TestContainerSpecReadonlyRootfs(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, readonly := range []bool{true, false} { - containerConfig.Linux.SecurityContext.ReadonlyRootfs = readonly - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Equal(t, readonly, spec.Root.Readonly) - } -} - -func TestContainerSpecWithExtraMounts(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - mountInConfig := &runtime.Mount{ - // Test cleanpath - ContainerPath: "test-container-path/", - HostPath: "test-host-path", - Readonly: false, - } - containerConfig.Mounts = append(containerConfig.Mounts, mountInConfig) - extraMounts := []*runtime.Mount{ - { - ContainerPath: "test-container-path", - HostPath: "test-host-path-extra", - Readonly: true, - }, - { - ContainerPath: "/sys", - HostPath: "test-sys-extra", - Readonly: false, - }, - } - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - var mounts, sysMounts []runtimespec.Mount - for _, m := range spec.Mounts { - if strings.HasPrefix(m.Destination, "test-container-path") { - mounts = append(mounts, m) - } else if m.Destination == "/sys" { - sysMounts = append(sysMounts, m) - } - } - t.Logf("CRI mount should override extra mount") - require.Len(t, mounts, 1) - assert.Equal(t, "test-host-path", mounts[0].Source) - assert.Contains(t, mounts[0].Options, "rw") - - t.Logf("Extra mount should override default mount") - require.Len(t, sysMounts, 1) - assert.Equal(t, "test-sys-extra", sysMounts[0].Source) - assert.Contains(t, sysMounts[0].Options, "rw") -} - -func TestContainerAndSandboxPrivileged(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, test := range []struct { - desc string - containerPrivileged bool - sandboxPrivileged bool - expectError bool - }{ - { - desc: "privileged container in non-privileged sandbox should fail", - containerPrivileged: true, - sandboxPrivileged: false, - expectError: true, - }, - { - desc: "privileged container in privileged sandbox should be fine", - containerPrivileged: true, - sandboxPrivileged: true, - expectError: false, - }, - { - desc: "non-privileged container in privileged sandbox should be fine", - containerPrivileged: false, - sandboxPrivileged: true, - expectError: false, - }, - { - desc: "non-privileged container in non-privileged sandbox should be fine", - containerPrivileged: false, - sandboxPrivileged: false, - expectError: false, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig.Linux.SecurityContext.Privileged = test.containerPrivileged - sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - Privileged: test.sandboxPrivileged, - } - _, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - if test.expectError { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - }) - } -} - -func TestPrivilegedBindMount(t *testing.T) { - testPid := uint32(1234) - c := newTestCRIService() - testSandboxID := "sandbox-id" - testContainerName := "container-name" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - - for _, test := range []struct { - desc string - privileged bool - expectedSysFSRO bool - expectedCgroupFSRO bool - }{ - { - desc: "sysfs and cgroupfs should mount as 'ro' by default", - expectedSysFSRO: true, - expectedCgroupFSRO: true, - }, - { - desc: "sysfs and cgroupfs should not mount as 'ro' if privileged", - privileged: true, - expectedSysFSRO: false, - expectedCgroupFSRO: false, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig.Linux.SecurityContext.Privileged = test.privileged - sandboxConfig.Linux.SecurityContext.Privileged = test.privileged - - spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - - assert.NoError(t, err) - if test.expectedSysFSRO { - checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"ro"}, []string{"rw"}) - } else { - checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"rw"}, []string{"ro"}) - } - if test.expectedCgroupFSRO { - checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, []string{"rw"}) - } else { - checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"rw"}, []string{"ro"}) - } - }) - } -} - -func TestMountPropagation(t *testing.T) { - - sharedLookupMountFn := func(string) (mount.Info, error) { - return mount.Info{ - Mountpoint: "host-path", - Optional: "shared:", - }, nil - } - - slaveLookupMountFn := func(string) (mount.Info, error) { - return mount.Info{ - Mountpoint: "host-path", - Optional: "master:", - }, nil - } - - othersLookupMountFn := func(string) (mount.Info, error) { - return mount.Info{ - Mountpoint: "host-path", - Optional: "others", - }, nil - } - - for _, test := range []struct { - desc string - criMount *runtime.Mount - fakeLookupMountFn func(string) (mount.Info, error) - optionsCheck []string - expectErr bool - }{ - { - desc: "HostPath should mount as 'rprivate' if propagation is MountPropagation_PROPAGATION_PRIVATE", - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_PRIVATE, - }, - fakeLookupMountFn: nil, - optionsCheck: []string{"rbind", "rprivate"}, - expectErr: false, - }, - { - desc: "HostPath should mount as 'rslave' if propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER", - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, - }, - fakeLookupMountFn: slaveLookupMountFn, - optionsCheck: []string{"rbind", "rslave"}, - expectErr: false, - }, - { - desc: "HostPath should mount as 'rshared' if propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL", - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, - }, - fakeLookupMountFn: sharedLookupMountFn, - optionsCheck: []string{"rbind", "rshared"}, - expectErr: false, - }, - { - desc: "HostPath should mount as 'rprivate' if propagation is illegal", - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation(42), - }, - fakeLookupMountFn: nil, - optionsCheck: []string{"rbind", "rprivate"}, - expectErr: false, - }, - { - desc: "Expect an error if HostPath isn't shared and mount propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL", - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL, - }, - fakeLookupMountFn: slaveLookupMountFn, - expectErr: true, - }, - { - desc: "Expect an error if HostPath isn't slave or shared and mount propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER", - criMount: &runtime.Mount{ - ContainerPath: "container-path", - HostPath: "host-path", - Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, - }, - fakeLookupMountFn: othersLookupMountFn, - expectErr: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.os.(*ostesting.FakeOS).LookupMountFn = test.fakeLookupMountFn - config, _, _, _ := getCreateContainerTestData() - - var spec runtimespec.Spec - spec.Linux = &runtimespec.Linux{} - - err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec) - if test.expectErr { - require.Error(t, err) - } else { - require.NoError(t, err) - checkMount(t, spec.Mounts, test.criMount.HostPath, test.criMount.ContainerPath, "bind", test.optionsCheck, nil) - } - }) - } -} - -func TestPidNamespace(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - testSandboxID := "sandbox-id" - testContainerName := "container-name" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, test := range []struct { - desc string - pidNS runtime.NamespaceMode - expected runtimespec.LinuxNamespace - }{ - { - desc: "node namespace mode", - pidNS: runtime.NamespaceMode_NODE, - expected: runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - Path: opts.GetPIDNamespace(testPid), - }, - }, - { - desc: "container namespace mode", - pidNS: runtime.NamespaceMode_CONTAINER, - expected: runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - }, - }, - { - desc: "pod namespace mode", - pidNS: runtime.NamespaceMode_POD, - expected: runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - Path: opts.GetPIDNamespace(testPid), - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{Pid: test.pidNS} - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - assert.Contains(t, spec.Linux.Namespaces, test.expected) - }) - } -} - -func TestUserNamespace(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - testSandboxID := "sandbox-id" - testContainerName := "container-name" - idMap := runtime.IDMapping{ - HostId: 1000, - ContainerId: 1000, - Length: 10, - } - otherIDMap := runtime.IDMapping{ - HostId: 2000, - ContainerId: 2000, - Length: 10, - } - expIDMap := runtimespec.LinuxIDMapping{ - HostID: 1000, - ContainerID: 1000, - Size: 10, - } - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - - for _, test := range []struct { - desc string - userNS *runtime.UserNamespace - sandboxUserNS *runtime.UserNamespace - expNS *runtimespec.LinuxNamespace - expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace - expUIDMapping []runtimespec.LinuxIDMapping - expGIDMapping []runtimespec.LinuxIDMapping - err bool - }{ - { - desc: "node namespace mode", - userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE}, - // Expect userns to NOT be present. - expNotNS: &runtimespec.LinuxNamespace{ - Type: runtimespec.UserNamespace, - Path: opts.GetUserNamespace(testPid), - }, - }, - { - desc: "node namespace mode with mappings", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_NODE, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - err: true, - }, - { - desc: "container namespace mode", - userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER}, - err: true, - }, - { - desc: "target namespace mode", - userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET}, - err: true, - }, - { - desc: "unknown namespace mode", - userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)}, - err: true, - }, - { - desc: "pod namespace mode", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - expNS: &runtimespec.LinuxNamespace{ - Type: runtimespec.UserNamespace, - Path: opts.GetUserNamespace(testPid), - }, - expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap}, - expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap}, - }, - { - desc: "pod namespace mode with inconsistent sandbox config (different GIDs)", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - sandboxUserNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&otherIDMap}, - }, - err: true, - }, - { - desc: "pod namespace mode with inconsistent sandbox config (different UIDs)", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - sandboxUserNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&otherIDMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - err: true, - }, - { - desc: "pod namespace mode with inconsistent sandbox config (different len)", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - sandboxUserNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap, &idMap}, - Gids: []*runtime.IDMapping{&idMap, &idMap}, - }, - err: true, - }, - { - desc: "pod namespace mode with inconsistent sandbox config (different mode)", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - sandboxUserNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_NODE, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - err: true, - }, - { - desc: "pod namespace mode with several mappings", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap, &idMap}, - Gids: []*runtime.IDMapping{&idMap, &idMap}, - }, - err: true, - }, - { - desc: "pod namespace mode with uneven mappings", - userNS: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap, &idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - err: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS} - // By default, set sandbox and container config to the same (this is - // required by containerSpec). However, if the test wants to test for what - // happens when they don't match, the test.sandboxUserNS should be set and - // we just use that. - sandboxUserns := test.userNS - if test.sandboxUserNS != nil { - sandboxUserns = test.sandboxUserNS - } - sandboxConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: sandboxUserns} - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - - if test.err { - require.Error(t, err) - assert.Nil(t, spec) - return - } - require.NoError(t, err) - assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping) - assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping) - - if test.expNS != nil { - assert.Contains(t, spec.Linux.Namespaces, *test.expNS) - } - if test.expNotNS != nil { - assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS) - } - }) - } -} - -func TestNoDefaultRunMount(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - testSandboxID := "sandbox-id" - testContainerName := "container-name" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - for _, mount := range spec.Mounts { - assert.NotEqual(t, "/run", mount.Destination) - } -} - -func TestGenerateSeccompSecurityProfileSpecOpts(t *testing.T) { - for _, test := range []struct { - desc string - profile string - privileged bool - disable bool - specOpts oci.SpecOpts - expectErr bool - defaultProfile string - sp *runtime.SecurityProfile - }{ - { - desc: "should return error if seccomp is specified when seccomp is not supported", - profile: runtimeDefault, - disable: true, - expectErr: true, - }, - { - desc: "should not return error if seccomp is not specified when seccomp is not supported", - profile: "", - disable: true, - }, - { - desc: "should not return error if seccomp is unconfined when seccomp is not supported", - profile: unconfinedProfile, - disable: true, - }, - { - desc: "should not set seccomp when privileged is true", - profile: seccompDefaultProfile, - privileged: true, - }, - { - desc: "should not set seccomp when seccomp is unconfined", - profile: unconfinedProfile, - }, - { - desc: "should not set seccomp when seccomp is not specified", - profile: "", - }, - { - desc: "should set default seccomp when seccomp is runtime/default", - profile: runtimeDefault, - specOpts: seccomp.WithDefaultProfile(), - }, - { - desc: "should set default seccomp when seccomp is docker/default", - profile: dockerDefault, - specOpts: seccomp.WithDefaultProfile(), - }, - { - desc: "should set specified profile when local profile is specified", - profile: profileNamePrefix + "test-profile", - specOpts: seccomp.WithProfile("test-profile"), - }, - { - desc: "should use default profile when seccomp is empty", - defaultProfile: profileNamePrefix + "test-profile", - specOpts: seccomp.WithProfile("test-profile"), - }, - { - desc: "should fallback to docker/default when seccomp is empty and default is runtime/default", - defaultProfile: runtimeDefault, - specOpts: seccomp.WithDefaultProfile(), - }, - //----------------------------------------------- - // now buckets for the SecurityProfile variants - //----------------------------------------------- - { - desc: "sp should return error if seccomp is specified when seccomp is not supported", - disable: true, - expectErr: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, - }, - { - desc: "sp should not return error if seccomp is unconfined when seccomp is not supported", - disable: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Unconfined, - }, - }, - { - desc: "sp should not set seccomp when privileged is true", - privileged: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, - }, - { - desc: "sp should not set seccomp when seccomp is unconfined", - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Unconfined, - }, - }, - { - desc: "sp should not set seccomp when seccomp is not specified", - }, - { - desc: "sp should set default seccomp when seccomp is runtime/default", - specOpts: seccomp.WithDefaultProfile(), - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, - }, - { - desc: "sp should set specified profile when local profile is specified", - specOpts: seccomp.WithProfile("test-profile"), - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Localhost, - LocalhostRef: profileNamePrefix + "test-profile", - }, - }, - { - desc: "sp should set specified profile when local profile is specified even without prefix", - specOpts: seccomp.WithProfile("test-profile"), - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Localhost, - LocalhostRef: "test-profile", - }, - }, - { - desc: "sp should return error if specified profile is invalid", - expectErr: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - LocalhostRef: "test-profile", - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - cri := &criService{} - cri.config.UnsetSeccompProfile = test.defaultProfile - ssp := test.sp - csp, err := generateSeccompSecurityProfile( - test.profile, - test.defaultProfile) - if err != nil { - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } else { - if ssp == nil { - ssp = csp - } - specOpts, err := cri.generateSeccompSpecOpts(ssp, test.privileged, !test.disable) - assert.Equal(t, - reflect.ValueOf(test.specOpts).Pointer(), - reflect.ValueOf(specOpts).Pointer()) - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } - }) - } -} - -func TestGenerateApparmorSpecOpts(t *testing.T) { - for _, test := range []struct { - desc string - profile string - privileged bool - disable bool - specOpts oci.SpecOpts - expectErr bool - sp *runtime.SecurityProfile - }{ - { - desc: "should return error if apparmor is specified when apparmor is not supported", - profile: runtimeDefault, - disable: true, - expectErr: true, - }, - { - desc: "should not return error if apparmor is not specified when apparmor is not supported", - profile: "", - disable: true, - }, - { - desc: "should set default apparmor when apparmor is not specified", - profile: "", - specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), - }, - { - desc: "should not apparmor when apparmor is not specified and privileged is true", - profile: "", - privileged: true, - }, - { - desc: "should not return error if apparmor is unconfined when apparmor is not supported", - profile: unconfinedProfile, - disable: true, - }, - { - desc: "should not apparmor when apparmor is unconfined", - profile: unconfinedProfile, - }, - { - desc: "should not apparmor when apparmor is unconfined and privileged is true", - profile: unconfinedProfile, - privileged: true, - }, - { - desc: "should set default apparmor when apparmor is runtime/default", - profile: runtimeDefault, - specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), - }, - { - desc: "should not apparmor when apparmor is default and privileged is true", - profile: runtimeDefault, - privileged: true, - }, - // TODO (mikebrow) add success with existing defined profile tests - { - desc: "should return error when undefined local profile is specified", - profile: profileNamePrefix + "test-profile", - expectErr: true, - }, - { - desc: "should return error when undefined local profile is specified and privileged is true", - profile: profileNamePrefix + "test-profile", - privileged: true, - expectErr: true, - }, - { - desc: "should return error if specified profile is invalid", - profile: "test-profile", - expectErr: true, - }, - //-------------------------------------- - // buckets for SecurityProfile struct - //-------------------------------------- - { - desc: "sp should return error if apparmor is specified when apparmor is not supported", - disable: true, - expectErr: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, - }, - { - desc: "sp should not return error if apparmor is unconfined when apparmor is not supported", - disable: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Unconfined, - }, - }, - { - desc: "sp should not apparmor when apparmor is unconfined", - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Unconfined, - }, - }, - { - desc: "sp should not apparmor when apparmor is unconfined and privileged is true", - privileged: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Unconfined, - }, - }, - { - desc: "sp should set default apparmor when apparmor is runtime/default", - specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName), - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, - }, - { - desc: "sp should not apparmor when apparmor is default and privileged is true", - privileged: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_RuntimeDefault, - }, - }, - { - desc: "sp should return error when undefined local profile is specified", - expectErr: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Localhost, - LocalhostRef: profileNamePrefix + "test-profile", - }, - }, - { - desc: "sp should return error when undefined local profile is specified even without prefix", - profile: profileNamePrefix + "test-profile", - expectErr: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Localhost, - LocalhostRef: "test-profile", - }, - }, - { - desc: "sp should return error when undefined local profile is specified and privileged is true", - privileged: true, - expectErr: true, - sp: &runtime.SecurityProfile{ - ProfileType: runtime.SecurityProfile_Localhost, - LocalhostRef: profileNamePrefix + "test-profile", - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - asp := test.sp - csp, err := generateApparmorSecurityProfile(test.profile) - if err != nil { - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } else { - if asp == nil { - asp = csp - } - specOpts, err := generateApparmorSpecOpts(asp, test.privileged, !test.disable) - assert.Equal(t, - reflect.ValueOf(test.specOpts).Pointer(), - reflect.ValueOf(specOpts).Pointer()) - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - } - }) - } -} - -func TestMaskedAndReadonlyPaths(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - - defaultSpec, err := oci.GenerateSpec(ctrdutil.NamespacedContext(), nil, &containers.Container{ID: testID}) - require.NoError(t, err) - - for _, test := range []struct { - desc string - disableProcMount bool - masked []string - readonly []string - expectedMasked []string - expectedReadonly []string - privileged bool - }{ - { - desc: "should apply default if not specified when disable_proc_mount = true", - disableProcMount: true, - masked: nil, - readonly: nil, - expectedMasked: defaultSpec.Linux.MaskedPaths, - expectedReadonly: defaultSpec.Linux.ReadonlyPaths, - privileged: false, - }, - { - desc: "should apply default if not specified when disable_proc_mount = false", - disableProcMount: false, - masked: nil, - readonly: nil, - expectedMasked: []string{}, - expectedReadonly: []string{}, - privileged: false, - }, - { - desc: "should be able to specify empty paths", - masked: []string{}, - readonly: []string{}, - expectedMasked: []string{}, - expectedReadonly: []string{}, - privileged: false, - }, - { - desc: "should apply CRI specified paths", - masked: []string{"/proc"}, - readonly: []string{"/sys"}, - expectedMasked: []string{"/proc"}, - expectedReadonly: []string{"/sys"}, - privileged: false, - }, - { - desc: "default should be nil for privileged", - expectedMasked: nil, - expectedReadonly: nil, - privileged: true, - }, - { - desc: "should be able to specify empty paths, esp. if privileged", - masked: []string{}, - readonly: []string{}, - expectedMasked: nil, - expectedReadonly: nil, - privileged: true, - }, - { - desc: "should not apply CRI specified paths if privileged", - masked: []string{"/proc"}, - readonly: []string{"/sys"}, - expectedMasked: nil, - expectedReadonly: nil, - privileged: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c.config.DisableProcMount = test.disableProcMount - containerConfig.Linux.SecurityContext.MaskedPaths = test.masked - containerConfig.Linux.SecurityContext.ReadonlyPaths = test.readonly - containerConfig.Linux.SecurityContext.Privileged = test.privileged - sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - Privileged: test.privileged, - } - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - if !test.privileged { // specCheck presumes an unprivileged container - specCheck(t, testID, testSandboxID, testPid, spec) - } - assert.Equal(t, test.expectedMasked, spec.Linux.MaskedPaths) - assert.Equal(t, test.expectedReadonly, spec.Linux.ReadonlyPaths) - }) - } -} - -func TestHostname(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { - return "real-hostname", nil - } - for _, test := range []struct { - desc string - hostname string - networkNs runtime.NamespaceMode - expectedEnv string - }{ - { - desc: "should add HOSTNAME=sandbox.Hostname for pod network namespace", - hostname: "test-hostname", - networkNs: runtime.NamespaceMode_POD, - expectedEnv: "HOSTNAME=test-hostname", - }, - { - desc: "should add HOSTNAME=sandbox.Hostname for host network namespace", - hostname: "test-hostname", - networkNs: runtime.NamespaceMode_NODE, - expectedEnv: "HOSTNAME=test-hostname", - }, - { - desc: "should add HOSTNAME=os.Hostname for host network namespace if sandbox.Hostname is not set", - hostname: "", - networkNs: runtime.NamespaceMode_NODE, - expectedEnv: "HOSTNAME=real-hostname", - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - sandboxConfig.Hostname = test.hostname - sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{Network: test.networkNs}, - } - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.Contains(t, spec.Process.Env, test.expectedEnv) - }) - } -} - -func TestDisableCgroup(t *testing.T) { - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - c.config.DisableCgroup = true - spec, err := c.buildContainerSpec(currentPlatform, "test-id", "sandbox-id", 1234, "", "container-name", testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - - t.Log("resource limit should not be set") - assert.Nil(t, spec.Linux.Resources.Memory) - assert.Nil(t, spec.Linux.Resources.CPU) - - t.Log("cgroup path should be empty") - assert.Empty(t, spec.Linux.CgroupsPath) -} - -func TestGenerateUserString(t *testing.T) { - type testcase struct { - // the name of the test case - name string - - u string - uid, gid *runtime.Int64Value - - result string - expectedError bool - } - testcases := []testcase{ - { - name: "Empty", - result: "", - }, - { - name: "Username Only", - u: "testuser", - result: "testuser", - }, - { - name: "Username, UID", - u: "testuser", - uid: &runtime.Int64Value{Value: 1}, - result: "testuser", - }, - { - name: "Username, UID, GID", - u: "testuser", - uid: &runtime.Int64Value{Value: 1}, - gid: &runtime.Int64Value{Value: 10}, - result: "testuser:10", - }, - { - name: "Username, GID", - u: "testuser", - gid: &runtime.Int64Value{Value: 10}, - result: "testuser:10", - }, - { - name: "UID only", - uid: &runtime.Int64Value{Value: 1}, - result: "1", - }, - { - name: "UID, GID", - uid: &runtime.Int64Value{Value: 1}, - gid: &runtime.Int64Value{Value: 10}, - result: "1:10", - }, - { - name: "GID only", - gid: &runtime.Int64Value{Value: 10}, - result: "", - expectedError: true, - }, - } - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - r, err := generateUserString(tc.u, tc.uid, tc.gid) - if tc.expectedError { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - assert.Equal(t, tc.result, r) - }) - } -} - -func TestProcessUser(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - ociRuntime := config.Runtime{} - c := newTestCRIService() - testContainer := &containers.Container{ID: "64ddfe361f0099f8d59075398feeb3dcb3863b6851df7b946744755066c03e9d"} - ctx := context.Background() - - etcPasswd := ` -root:x:0:0:root:/root:/bin/sh -alice:x:1000:1000:alice:/home/alice:/bin/sh -` // #nosec G101 - etcGroup := ` -root:x:0 -alice:x:1000: -additional-group-for-alice:x:11111:alice -additional-group-for-root:x:22222:root -` - tempRootDir := t.TempDir() - require.NoError(t, - os.MkdirAll(filepath.Join(tempRootDir, "etc"), 0755), - ) - require.NoError(t, - os.WriteFile(filepath.Join(tempRootDir, "etc", "passwd"), []byte(etcPasswd), 0644), - ) - require.NoError(t, - os.WriteFile(filepath.Join(tempRootDir, "etc", "group"), []byte(etcGroup), 0644), - ) - - for _, test := range []struct { - desc string - imageConfigUser string - securityContext *runtime.LinuxContainerSecurityContext - expected runtimespec.User - }{ - { - desc: "Only SecurityContext was set, SecurityContext defines User", - securityContext: &runtime.LinuxContainerSecurityContext{ - RunAsUser: &runtime.Int64Value{Value: 1000}, - RunAsGroup: &runtime.Int64Value{Value: 2000}, - SupplementalGroups: []int64{3333}, - }, - expected: runtimespec.User{UID: 1000, GID: 2000, AdditionalGids: []uint32{2000, 3333, 11111}}, - }, - { - desc: "Only imageConfig.User was set, imageConfig.User defines User", - imageConfigUser: "1000", - securityContext: nil, - expected: runtimespec.User{UID: 1000, GID: 1000, AdditionalGids: []uint32{1000, 11111}}, - }, - { - desc: "Both SecurityContext and ImageConfig.User was set, SecurityContext defines User", - imageConfigUser: "0", - securityContext: &runtime.LinuxContainerSecurityContext{ - RunAsUser: &runtime.Int64Value{Value: 1000}, - RunAsGroup: &runtime.Int64Value{Value: 2000}, - SupplementalGroups: []int64{3333}, - }, - expected: runtimespec.User{UID: 1000, GID: 2000, AdditionalGids: []uint32{2000, 3333, 11111}}, - }, - { - desc: "No SecurityContext nor ImageConfig.User were set, runtime default defines User", - expected: runtimespec.User{UID: 0, GID: 0, AdditionalGids: []uint32{0, 22222}}, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - containerConfig.Linux.SecurityContext = test.securityContext - imageConfig.User = test.imageConfigUser - - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - - spec.Root.Path = tempRootDir // simulating /etc/{passwd, group} - opts, err := c.platformSpecOpts(platforms.DefaultSpec(), containerConfig, imageConfig) - require.NoError(t, err) - oci.ApplyOpts(ctx, nil, testContainer, spec, opts...) - - require.Equal(t, test.expected, spec.Process.User) - }) - } -} - -func TestNonRootUserAndDevices(t *testing.T) { - testPid := uint32(1234) - c := newTestCRIService() - testSandboxID := "sandbox-id" - testContainerName := "container-name" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - - hostDevicesRaw, err := oci.HostDevices() - assert.NoError(t, err) - - testDevice := hostDevicesRaw[0] - - for _, test := range []struct { - desc string - uid, gid *runtime.Int64Value - deviceOwnershipFromSecurityContext bool - expectedDeviceUID uint32 - expectedDeviceGID uint32 - }{ - { - desc: "expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled", - uid: &runtime.Int64Value{Value: 1}, - gid: &runtime.Int64Value{Value: 10}, - expectedDeviceUID: *testDevice.UID, - expectedDeviceGID: *testDevice.GID, - }, - { - desc: "expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled", - uid: &runtime.Int64Value{Value: 0}, - gid: &runtime.Int64Value{Value: 0}, - expectedDeviceUID: *testDevice.UID, - expectedDeviceGID: *testDevice.GID, - }, - { - desc: "expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled", - uid: &runtime.Int64Value{Value: 1}, - gid: &runtime.Int64Value{Value: 10}, - deviceOwnershipFromSecurityContext: true, - expectedDeviceUID: 1, - expectedDeviceGID: 10, - }, - { - desc: "expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled", - uid: &runtime.Int64Value{Value: 0}, - gid: &runtime.Int64Value{Value: 0}, - deviceOwnershipFromSecurityContext: true, - expectedDeviceUID: *testDevice.UID, - expectedDeviceGID: *testDevice.GID, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext - containerConfig.Linux.SecurityContext.RunAsUser = test.uid - containerConfig.Linux.SecurityContext.RunAsGroup = test.gid - containerConfig.Devices = []*runtime.Device{ - { - ContainerPath: testDevice.Path, - HostPath: testDevice.Path, - Permissions: "r", - }, - } - - spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) - assert.NoError(t, err) - - assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID) - assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID) - }) - } -} - -func TestPrivilegedDevices(t *testing.T) { - testPid := uint32(1234) - c := newTestCRIService() - testSandboxID := "sandbox-id" - testContainerName := "container-name" - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - - for _, test := range []struct { - desc string - privileged bool - privilegedWithoutHostDevices bool - privilegedWithoutHostDevicesAllDevicesAllowed bool - expectHostDevices bool - expectAllDevicesAllowed bool - }{ - { - desc: "expect no host devices when privileged is false", - privileged: false, - privilegedWithoutHostDevices: false, - privilegedWithoutHostDevicesAllDevicesAllowed: false, - expectHostDevices: false, - expectAllDevicesAllowed: false, - }, - { - desc: "expect no host devices when privileged is false and privilegedWithoutHostDevices is true", - privileged: false, - privilegedWithoutHostDevices: true, - privilegedWithoutHostDevicesAllDevicesAllowed: false, - expectHostDevices: false, - expectAllDevicesAllowed: false, - }, - { - desc: "expect host devices and all device allowlist when privileged is true", - privileged: true, - privilegedWithoutHostDevices: false, - privilegedWithoutHostDevicesAllDevicesAllowed: false, - expectHostDevices: true, - expectAllDevicesAllowed: true, - }, - { - desc: "expect no host devices when privileged is true and privilegedWithoutHostDevices is true", - privileged: true, - privilegedWithoutHostDevices: true, - privilegedWithoutHostDevicesAllDevicesAllowed: false, - expectHostDevices: false, - expectAllDevicesAllowed: false, - }, - { - desc: "expect host devices and all devices allowlist when privileged is true and privilegedWithoutHostDevices is true and privilegedWithoutHostDevicesAllDevicesAllowed is true", - privileged: true, - privilegedWithoutHostDevices: true, - privilegedWithoutHostDevicesAllDevicesAllowed: true, - expectHostDevices: false, - expectAllDevicesAllowed: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig.Linux.SecurityContext.Privileged = test.privileged - sandboxConfig.Linux.SecurityContext.Privileged = test.privileged - - ociRuntime := config.Runtime{ - PrivilegedWithoutHostDevices: test.privilegedWithoutHostDevices, - PrivilegedWithoutHostDevicesAllDevicesAllowed: test.privilegedWithoutHostDevicesAllDevicesAllowed, - } - spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - - hostDevicesRaw, err := oci.HostDevices() - assert.NoError(t, err) - var hostDevices = make([]string, 0) - for _, dev := range hostDevicesRaw { - // https://github.com/containerd/cri/pull/1521#issuecomment-652807951 - if dev.Major != 0 { - hostDevices = append(hostDevices, dev.Path) - } - } - - if test.expectHostDevices { - assert.Len(t, spec.Linux.Devices, len(hostDevices)) - } else { - assert.Empty(t, spec.Linux.Devices) - } - - assert.Len(t, spec.Linux.Resources.Devices, 1) - assert.Equal(t, spec.Linux.Resources.Devices[0].Allow, test.expectAllDevicesAllowed) - assert.Equal(t, spec.Linux.Resources.Devices[0].Access, "rwm") - }) - } -} - -func TestBaseOCISpec(t *testing.T) { - c := newTestCRIService() - baseLimit := int64(100) - c.baseOCISpecs = map[string]*oci.Spec{ - "/etc/containerd/cri-base.json": { - Process: &runtimespec.Process{ - User: runtimespec.User{AdditionalGids: []uint32{9999}}, - Capabilities: &runtimespec.LinuxCapabilities{ - Permitted: []string{"CAP_SETUID"}, - }, - }, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{Limit: &baseLimit}, // Will be overwritten by `getCreateContainerTestData` - }, - }, - }, - } - - ociRuntime := config.Runtime{} - ociRuntime.BaseRuntimeSpec = "/etc/containerd/cri-base.json" - - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - - specCheck(t, testID, testSandboxID, testPid, spec) - - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(9999)) - assert.Len(t, spec.Process.User.AdditionalGids, 3) - - assert.Contains(t, spec.Process.Capabilities.Permitted, "CAP_SETUID") - assert.Len(t, spec.Process.Capabilities.Permitted, 1) - - assert.Equal(t, *spec.Linux.Resources.Memory.Limit, containerConfig.Linux.Resources.MemoryLimitInBytes) -} - -func writeFilesToTempDir(tmpDirPattern string, content []string) (string, error) { - if len(content) == 0 { - return "", nil - } - - dir, err := os.MkdirTemp("", tmpDirPattern) - if err != nil { - return "", err - } - - for idx, data := range content { - file := filepath.Join(dir, fmt.Sprintf("spec-%d.yaml", idx)) - err := os.WriteFile(file, []byte(data), 0644) - if err != nil { - return "", err - } - } - - return dir, nil -} - -func TestCDIInjections(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - testContainer := &containers.Container{ID: "64ddfe361f0099f8d59075398feeb3dcb3863b6851df7b946744755066c03e9d"} - ctx := context.Background() - - for _, test := range []struct { - description string - cdiSpecFiles []string - cdiDevices []*runtime.CDIDevice - annotations map[string]string - expectError bool - expectDevices []runtimespec.LinuxDevice - expectEnv []string - }{ - {description: "expect no CDI error for nil annotations", - cdiDevices: []*runtime.CDIDevice{}, - }, - {description: "expect no CDI error for nil CDIDevices", - annotations: map[string]string{}, - }, - {description: "expect no CDI error for empty CDI devices and annotations", - cdiDevices: []*runtime.CDIDevice{}, - annotations: map[string]string{}, - }, - {description: "expect CDI error for invalid CDI device reference in annotations", - annotations: map[string]string{ - cdi.AnnotationPrefix + "devices": "foobar", - }, - expectError: true, - }, - {description: "expect CDI error for invalid CDI device reference in CDIDevices", - cdiDevices: []*runtime.CDIDevice{ - {Name: "foobar"}, - }, - expectError: true, - }, - {description: "expect CDI error for unresolvable devices in annotations", - annotations: map[string]string{ - cdi.AnnotationPrefix + "vendor1_devices": "vendor1.com/device=no-such-dev", - }, - expectError: true, - }, - {description: "expect CDI error for unresolvable devices in CDIDevices", - cdiDevices: []*runtime.CDIDevice{ - {Name: "vendor1.com/device=no-such-dev"}, - }, - expectError: true, - }, - {description: "expect properly injected resolvable CDI devices from annotations", - cdiSpecFiles: []string{ - ` -cdiVersion: "0.3.0" -kind: "vendor1.com/device" -devices: - - name: foo - containerEdits: - deviceNodes: - - path: /dev/loop8 - type: b - major: 7 - minor: 8 - env: - - FOO=injected -containerEdits: - env: - - "VENDOR1=present" -`, - ` -cdiVersion: "0.3.0" -kind: "vendor2.com/device" -devices: - - name: bar - containerEdits: - deviceNodes: - - path: /dev/loop9 - type: b - major: 7 - minor: 9 - env: - - BAR=injected -containerEdits: - env: - - "VENDOR2=present" -`, - }, - annotations: map[string]string{ - cdi.AnnotationPrefix + "vendor1_devices": "vendor1.com/device=foo", - cdi.AnnotationPrefix + "vendor2_devices": "vendor2.com/device=bar", - }, - expectDevices: []runtimespec.LinuxDevice{ - { - Path: "/dev/loop8", - Type: "b", - Major: 7, - Minor: 8, - }, - { - Path: "/dev/loop9", - Type: "b", - Major: 7, - Minor: 9, - }, - }, - expectEnv: []string{ - "FOO=injected", - "VENDOR1=present", - "BAR=injected", - "VENDOR2=present", - }, - }, - {description: "expect properly injected resolvable CDI devices from CDIDevices", - cdiSpecFiles: []string{ - ` -cdiVersion: "0.3.0" -kind: "vendor1.com/device" -devices: - - name: foo - containerEdits: - deviceNodes: - - path: /dev/loop8 - type: b - major: 7 - minor: 8 - env: - - FOO=injected -containerEdits: - env: - - "VENDOR1=present" -`, - ` -cdiVersion: "0.3.0" -kind: "vendor2.com/device" -devices: - - name: bar - containerEdits: - deviceNodes: - - path: /dev/loop9 - type: b - major: 7 - minor: 9 - env: - - BAR=injected -containerEdits: - env: - - "VENDOR2=present" -`, - }, - cdiDevices: []*runtime.CDIDevice{ - {Name: "vendor1.com/device=foo"}, - {Name: "vendor2.com/device=bar"}, - }, - expectDevices: []runtimespec.LinuxDevice{ - { - Path: "/dev/loop8", - Type: "b", - Major: 7, - Minor: 8, - }, - { - Path: "/dev/loop9", - Type: "b", - Major: 7, - Minor: 9, - }, - }, - expectEnv: []string{ - "FOO=injected", - "VENDOR1=present", - "BAR=injected", - "VENDOR2=present", - }, - }, - {description: "expect CDI devices from CDIDevices and annotations", - cdiSpecFiles: []string{ - ` -cdiVersion: "0.3.0" -kind: "vendor1.com/device" -devices: - - name: foo - containerEdits: - deviceNodes: - - path: /dev/loop8 - type: b - major: 7 - minor: 8 - env: - - FOO=injected -containerEdits: - env: - - "VENDOR1=present" -`, - ` -cdiVersion: "0.3.0" -kind: "vendor2.com/device" -devices: - - name: bar - containerEdits: - deviceNodes: - - path: /dev/loop9 - type: b - major: 7 - minor: 9 - env: - - BAR=injected -containerEdits: - env: - - "VENDOR2=present" -`, - ` -cdiVersion: "0.3.0" -kind: "vendor3.com/device" -devices: - - name: foo3 - containerEdits: - deviceNodes: - - path: /dev/loop10 - type: b - major: 7 - minor: 10 - env: - - FOO3=injected -containerEdits: - env: - - "VENDOR3=present" -`, - ` -cdiVersion: "0.3.0" -kind: "vendor4.com/device" -devices: - - name: bar4 - containerEdits: - deviceNodes: - - path: /dev/loop11 - type: b - major: 7 - minor: 11 - env: - - BAR4=injected -containerEdits: - env: - - "VENDOR4=present" -`, - }, - cdiDevices: []*runtime.CDIDevice{ - {Name: "vendor1.com/device=foo"}, - {Name: "vendor2.com/device=bar"}, - {Name: "vendor3.com/device=foo3"}, - }, - annotations: map[string]string{ - cdi.AnnotationPrefix + "vendor3_devices": "vendor3.com/device=foo3", // Duplicated device, should be ignored - cdi.AnnotationPrefix + "vendor4_devices": "vendor4.com/device=bar4", - }, - expectDevices: []runtimespec.LinuxDevice{ - { - Path: "/dev/loop8", - Type: "b", - Major: 7, - Minor: 8, - }, - { - Path: "/dev/loop9", - Type: "b", - Major: 7, - Minor: 9, - }, - { - Path: "/dev/loop10", - Type: "b", - Major: 7, - Minor: 10, - }, - { - Path: "/dev/loop11", - Type: "b", - Major: 7, - Minor: 11, - }, - }, - expectEnv: []string{ - "FOO=injected", - "VENDOR1=present", - "BAR=injected", - "VENDOR2=present", - "FOO3=injected", - "VENDOR3=present", - "BAR4=injected", - "VENDOR4=present", - }, - }, - } { - t.Run(test.description, func(t *testing.T) { - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - - specCheck(t, testID, testSandboxID, testPid, spec) - - cdiDir, err := writeFilesToTempDir("containerd-test-CDI-injections-", test.cdiSpecFiles) - if cdiDir != "" { - defer os.RemoveAll(cdiDir) - } - require.NoError(t, err) - - reg := cdi.GetRegistry() - err = reg.Configure(cdi.WithSpecDirs(cdiDir)) - require.NoError(t, err) - - injectFun := customopts.WithCDI(test.annotations, test.cdiDevices) - err = injectFun(ctx, nil, testContainer, spec) - assert.Equal(t, test.expectError, err != nil) - - if err != nil { - if test.expectEnv != nil { - for _, expectedEnv := range test.expectEnv { - assert.Contains(t, spec.Process.Env, expectedEnv) - } - } - if test.expectDevices != nil { - for _, expectedDev := range test.expectDevices { - assert.Contains(t, spec.Linux.Devices, expectedDev) - } - } - } - }) - } -} diff --git a/pkg/cri/sbserver/container_create_other.go b/pkg/cri/sbserver/container_create_other.go deleted file mode 100644 index a5feb385c..000000000 --- a/pkg/cri/sbserver/container_create_other.go +++ /dev/null @@ -1,36 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/snapshots" -) - -func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - return []oci.SpecOpts{}, nil -} - -// snapshotterOpts returns snapshotter options for the rootfs snapshot -func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { - return []snapshots.Opt{}, nil -} diff --git a/pkg/cri/sbserver/container_create_other_test.go b/pkg/cri/sbserver/container_create_other_test.go deleted file mode 100644 index cc639305d..000000000 --- a/pkg/cri/sbserver/container_create_other_test.go +++ /dev/null @@ -1,115 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/annotations" -) - -// checkMount is defined by all tests but not used here -var _ = checkMount - -func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, - *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{ - Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", - }, - Command: []string{"test", "command"}, - Args: []string{"test", "args"}, - WorkingDir: "test-cwd", - Envs: []*runtime.KeyValue{ - {Key: "k1", Value: "v1"}, - {Key: "k2", Value: "v2"}, - {Key: "k3", Value: "v3=v3bis"}, - {Key: "k4", Value: "v4=v4bis=foop"}, - }, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"ca-c": "ca-d"}, - Mounts: []*runtime.Mount{ - // everything default - { - ContainerPath: "container-path-1", - HostPath: "host-path-1", - }, - // readOnly - { - ContainerPath: "container-path-2", - HostPath: "host-path-2", - Readonly: true, - }, - }, - } - sandboxConfig := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-sandbox-name", - Uid: "test-sandbox-uid", - Namespace: "test-sandbox-ns", - Attempt: 2, - }, - Annotations: map[string]string{"c": "d"}, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, - Entrypoint: []string{"/entrypoint"}, - Cmd: []string{"cmd"}, - WorkingDir: "/workspace", - } - specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { - assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) - assert.Equal(t, "test-cwd", spec.Process.Cwd) - assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") - assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") - - t.Logf("Check bind mount") - checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rw"}, nil) - checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"ro"}, nil) - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) - - assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) - assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns") - - assert.Contains(t, spec.Annotations, annotations.SandboxUID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid") - - assert.Contains(t, spec.Annotations, annotations.SandboxName) - assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name") - - assert.Contains(t, spec.Annotations, annotations.ImageName) - assert.EqualValues(t, spec.Annotations[annotations.ImageName], testImageName) - } - return config, sandboxConfig, imageConfig, specCheck -} diff --git a/pkg/cri/sbserver/container_create_test.go b/pkg/cri/sbserver/container_create_test.go deleted file mode 100644 index bbc21f209..000000000 --- a/pkg/cri/sbserver/container_create_test.go +++ /dev/null @@ -1,779 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "os" - "path/filepath" - goruntime "runtime" - "testing" - - ostesting "github.com/containerd/containerd/pkg/os/testing" - "github.com/containerd/containerd/platforms" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/constants" - "github.com/containerd/containerd/pkg/cri/opts" -) - -var currentPlatform = platforms.DefaultSpec() - -func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string, - contains, notcontains []string) { - found := false - for _, m := range mounts { - if m.Source == src && m.Destination == dest { - assert.Equal(t, m.Type, typ) - for _, c := range contains { - assert.Contains(t, m.Options, c) - } - for _, n := range notcontains { - assert.NotContains(t, m.Options, n) - } - found = true - break - } - } - assert.True(t, found, "mount from %q to %q not found", src, dest) -} - -const testImageName = "container-image-name" - -func TestGeneralContainerSpec(t *testing.T) { - testID := "test-id" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - testSandboxID := "sandbox-id" - testContainerName := "container-name" - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - require.NoError(t, err) - specCheck(t, testID, testSandboxID, testPid, spec) -} - -func TestPodAnnotationPassthroughContainerSpec(t *testing.T) { - switch goruntime.GOOS { - case "darwin": - t.Skip("not implemented on Darwin") - case "freebsd": - t.Skip("not implemented on FreeBSD") - } - - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - - for _, test := range []struct { - desc string - podAnnotations []string - configChange func(*runtime.PodSandboxConfig) - specCheck func(*testing.T, *runtimespec.Spec) - }{ - { - desc: "a passthrough annotation should be passed as an OCI annotation", - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - }, - }, - { - desc: "a non-passthrough annotation should not be passed as an OCI annotation", - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["d"] = "e" - }, - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - _, ok := spec.Annotations["d"] - assert.False(t, ok) - }, - }, - { - desc: "passthrough annotations should support wildcard match", - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["t.f"] = "j" - c.Annotations["z.g"] = "o" - c.Annotations["z"] = "o" - c.Annotations["y.ca"] = "b" - c.Annotations["y"] = "b" - }, - podAnnotations: []string{"t*", "z.*", "y.c*"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - t.Logf("%+v", spec.Annotations) - assert.Equal(t, spec.Annotations["t.f"], "j") - assert.Equal(t, spec.Annotations["z.g"], "o") - assert.Equal(t, spec.Annotations["y.ca"], "b") - _, ok := spec.Annotations["y"] - assert.False(t, ok) - _, ok = spec.Annotations["z"] - assert.False(t, ok) - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - if test.configChange != nil { - test.configChange(sandboxConfig) - } - - ociRuntime := config.Runtime{ - PodAnnotations: test.podAnnotations, - } - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, - containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, testSandboxID, testPid, spec) - if test.specCheck != nil { - test.specCheck(t, spec) - } - }) - } -} - -func TestContainerSpecCommand(t *testing.T) { - for _, test := range []struct { - desc string - criEntrypoint []string - criArgs []string - imageEntrypoint []string - imageArgs []string - expected []string - expectErr bool - }{ - { - desc: "should use cri entrypoint if it's specified", - criEntrypoint: []string{"a", "b"}, - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"a", "b"}, - }, - { - desc: "should use cri entrypoint if it's specified even if it's empty", - criEntrypoint: []string{}, - criArgs: []string{"a", "b"}, - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"a", "b"}, - }, - { - desc: "should use cri entrypoint and args if they are specified", - criEntrypoint: []string{"a", "b"}, - criArgs: []string{"c", "d"}, - imageEntrypoint: []string{"e", "f"}, - imageArgs: []string{"g", "h"}, - expected: []string{"a", "b", "c", "d"}, - }, - { - desc: "should use image entrypoint if cri entrypoint is not specified", - criArgs: []string{"a", "b"}, - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"c", "d", "a", "b"}, - }, - { - desc: "should use image args if both cri entrypoint and args are not specified", - imageEntrypoint: []string{"c", "d"}, - imageArgs: []string{"e", "f"}, - expected: []string{"c", "d", "e", "f"}, - }, - { - desc: "should return error if both entrypoint and args are empty", - expectErr: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - config, _, imageConfig, _ := getCreateContainerTestData() - config.Command = test.criEntrypoint - config.Args = test.criArgs - imageConfig.Entrypoint = test.imageEntrypoint - imageConfig.Cmd = test.imageArgs - - var spec runtimespec.Spec - err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec) - if test.expectErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - assert.Equal(t, test.expected, spec.Process.Args, test.desc) - }) - } -} - -func TestVolumeMounts(t *testing.T) { - testContainerRootDir := "test-container-root" - idmap := []*runtime.IDMapping{ - { - ContainerId: 0, - HostId: 100, - Length: 1, - }, - } - - for _, test := range []struct { - desc string - platform platforms.Platform - criMounts []*runtime.Mount - usernsEnabled bool - imageVolumes map[string]struct{} - expectedMountDest []string - expectedMappings []*runtime.IDMapping - }{ - { - desc: "should setup rw mount for image volumes", - imageVolumes: map[string]struct{}{ - "/test-volume-1": {}, - "/test-volume-2": {}, - }, - expectedMountDest: []string{ - "/test-volume-1", - "/test-volume-2", - }, - }, - { - desc: "should skip image volumes if already mounted by CRI", - criMounts: []*runtime.Mount{ - { - ContainerPath: "/test-volume-1", - HostPath: "/test-hostpath-1", - }, - }, - imageVolumes: map[string]struct{}{ - "/test-volume-1": {}, - "/test-volume-2": {}, - }, - expectedMountDest: []string{ - "/test-volume-2", - }, - }, - { - desc: "should compare and return cleanpath", - criMounts: []*runtime.Mount{ - { - ContainerPath: "/test-volume-1", - HostPath: "/test-hostpath-1", - }, - }, - imageVolumes: map[string]struct{}{ - "/test-volume-1/": {}, - "/test-volume-2/": {}, - }, - expectedMountDest: []string{ - "/test-volume-2/", - }, - }, - { - desc: "should make relative paths absolute on Linux", - platform: platforms.Platform{OS: "linux"}, - imageVolumes: map[string]struct{}{ - "./test-volume-1": {}, - "C:/test-volume-2": {}, - "../../test-volume-3": {}, - "/abs/test-volume-4": {}, - }, - expectedMountDest: []string{ - "/test-volume-1", - "/C:/test-volume-2", - "/test-volume-3", - "/abs/test-volume-4", - }, - }, - { - desc: "should include mappings for image volumes on Linux", - platform: platforms.Platform{OS: "linux"}, - usernsEnabled: true, - imageVolumes: map[string]struct{}{ - "/test-volume-1/": {}, - "/test-volume-2/": {}, - }, - expectedMountDest: []string{ - "/test-volume-2/", - "/test-volume-2/", - }, - expectedMappings: idmap, - }, - { - desc: "should NOT include mappings for image volumes on Linux if !userns", - platform: platforms.Platform{OS: "linux"}, - usernsEnabled: false, - imageVolumes: map[string]struct{}{ - "/test-volume-1/": {}, - "/test-volume-2/": {}, - }, - expectedMountDest: []string{ - "/test-volume-2/", - "/test-volume-2/", - }, - }, - { - desc: "should convert rel imageVolume paths to abs paths and add userns mappings", - platform: platforms.Platform{OS: "linux"}, - usernsEnabled: true, - imageVolumes: map[string]struct{}{ - "test-volume-1/": {}, - "C:/test-volume-2/": {}, - "../../test-volume-3/": {}, - }, - expectedMountDest: []string{ - "/test-volume-1", - "/C:/test-volume-2", - "/test-volume-3", - }, - expectedMappings: idmap, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - config := &imagespec.ImageConfig{ - Volumes: test.imageVolumes, - } - containerConfig := &runtime.ContainerConfig{Mounts: test.criMounts} - if test.usernsEnabled { - containerConfig.Linux = &runtime.LinuxContainerConfig{ - SecurityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: idmap, - Gids: idmap, - }, - }, - }, - } - } - - c := newTestCRIService() - got := c.volumeMounts(test.platform, testContainerRootDir, containerConfig, config) - assert.Len(t, got, len(test.expectedMountDest)) - for _, dest := range test.expectedMountDest { - found := false - for _, m := range got { - if m.ContainerPath != dest { - continue - } - found = true - assert.Equal(t, - filepath.Dir(m.HostPath), - filepath.Join(testContainerRootDir, "volumes")) - if test.expectedMappings != nil { - assert.Equal(t, test.expectedMappings, m.UidMappings) - assert.Equal(t, test.expectedMappings, m.GidMappings) - } - break - } - assert.True(t, found) - } - }) - } -} - -func TestContainerAnnotationPassthroughContainerSpec(t *testing.T) { - switch goruntime.GOOS { - case "darwin": - t.Skip("not implemented on Darwin") - case "freebsd": - t.Skip("not implemented on FreeBSD") - } - - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - - for _, test := range []struct { - desc string - podAnnotations []string - containerAnnotations []string - podConfigChange func(*runtime.PodSandboxConfig) - configChange func(*runtime.ContainerConfig) - specCheck func(*testing.T, *runtimespec.Spec) - }{ - { - desc: "passthrough annotations from pod and container should be passed as an OCI annotation", - podConfigChange: func(p *runtime.PodSandboxConfig) { - p.Annotations["pod.annotation.1"] = "1" - p.Annotations["pod.annotation.2"] = "2" - p.Annotations["pod.annotation.3"] = "3" - }, - configChange: func(c *runtime.ContainerConfig) { - c.Annotations["container.annotation.1"] = "1" - c.Annotations["container.annotation.2"] = "2" - c.Annotations["container.annotation.3"] = "3" - }, - podAnnotations: []string{"pod.annotation.1"}, - containerAnnotations: []string{"container.annotation.1"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, "1", spec.Annotations["container.annotation.1"]) - _, ok := spec.Annotations["container.annotation.2"] - assert.False(t, ok) - _, ok = spec.Annotations["container.annotation.3"] - assert.False(t, ok) - assert.Equal(t, "1", spec.Annotations["pod.annotation.1"]) - _, ok = spec.Annotations["pod.annotation.2"] - assert.False(t, ok) - _, ok = spec.Annotations["pod.annotation.3"] - assert.False(t, ok) - }, - }, - { - desc: "passthrough annotations from pod and container should support wildcard", - podConfigChange: func(p *runtime.PodSandboxConfig) { - p.Annotations["pod.annotation.1"] = "1" - p.Annotations["pod.annotation.2"] = "2" - p.Annotations["pod.annotation.3"] = "3" - }, - configChange: func(c *runtime.ContainerConfig) { - c.Annotations["container.annotation.1"] = "1" - c.Annotations["container.annotation.2"] = "2" - c.Annotations["container.annotation.3"] = "3" - }, - podAnnotations: []string{"pod.annotation.*"}, - containerAnnotations: []string{"container.annotation.*"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, "1", spec.Annotations["container.annotation.1"]) - assert.Equal(t, "2", spec.Annotations["container.annotation.2"]) - assert.Equal(t, "3", spec.Annotations["container.annotation.3"]) - assert.Equal(t, "1", spec.Annotations["pod.annotation.1"]) - assert.Equal(t, "2", spec.Annotations["pod.annotation.2"]) - assert.Equal(t, "3", spec.Annotations["pod.annotation.3"]) - }, - }, - { - desc: "annotations should not pass through if no passthrough annotations are configured", - podConfigChange: func(p *runtime.PodSandboxConfig) { - p.Annotations["pod.annotation.1"] = "1" - p.Annotations["pod.annotation.2"] = "2" - p.Annotations["pod.annotation.3"] = "3" - }, - configChange: func(c *runtime.ContainerConfig) { - c.Annotations["container.annotation.1"] = "1" - c.Annotations["container.annotation.2"] = "2" - c.Annotations["container.annotation.3"] = "3" - }, - podAnnotations: []string{}, - containerAnnotations: []string{}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - _, ok := spec.Annotations["container.annotation.1"] - assert.False(t, ok) - _, ok = spec.Annotations["container.annotation.2"] - assert.False(t, ok) - _, ok = spec.Annotations["container.annotation.3"] - assert.False(t, ok) - _, ok = spec.Annotations["pod.annotation.1"] - assert.False(t, ok) - _, ok = spec.Annotations["pod.annotation.2"] - assert.False(t, ok) - _, ok = spec.Annotations["pod.annotation.3"] - assert.False(t, ok) - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - if test.configChange != nil { - test.configChange(containerConfig) - } - if test.podConfigChange != nil { - test.podConfigChange(sandboxConfig) - } - ociRuntime := config.Runtime{ - PodAnnotations: test.podAnnotations, - ContainerAnnotations: test.containerAnnotations, - } - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, - containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, testSandboxID, testPid, spec) - if test.specCheck != nil { - test.specCheck(t, spec) - } - }) - } -} - -func TestBaseRuntimeSpec(t *testing.T) { - c := newTestCRIService() - c.baseOCISpecs = map[string]*oci.Spec{ - "/etc/containerd/cri-base.json": { - Version: "1.0.2", - Hostname: "old", - }, - } - - out, err := c.runtimeSpec( - "id1", - platforms.DefaultSpec(), - "/etc/containerd/cri-base.json", - oci.WithHostname("new-host"), - oci.WithDomainname("new-domain"), - ) - assert.NoError(t, err) - - assert.Equal(t, "1.0.2", out.Version) - assert.Equal(t, "new-host", out.Hostname) - assert.Equal(t, "new-domain", out.Domainname) - - // Make sure original base spec not changed - assert.NotEqual(t, out, c.baseOCISpecs["/etc/containerd/cri-base.json"]) - assert.Equal(t, c.baseOCISpecs["/etc/containerd/cri-base.json"].Hostname, "old") - - assert.Equal(t, filepath.Join("/", constants.K8sContainerdNamespace, "id1"), out.Linux.CgroupsPath) -} - -func TestLinuxContainerMounts(t *testing.T) { - const testSandboxID = "test-id" - idmap := []*runtime.IDMapping{ - { - ContainerId: 0, - HostId: 100, - Length: 1, - }, - } - - for _, test := range []struct { - desc string - statFn func(string) (os.FileInfo, error) - criMounts []*runtime.Mount - securityContext *runtime.LinuxContainerSecurityContext - expectedMounts []*runtime.Mount - }{ - { - desc: "should setup ro mount when rootfs is read-only", - securityContext: &runtime.LinuxContainerSecurityContext{ - ReadonlyRootfs: true, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: true, - SelinuxRelabel: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: true, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: true, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - { - desc: "should setup rw mount when rootfs is read-write", - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - { - desc: "should setup uidMappings/gidMappings when userns is used", - securityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: idmap, - Gids: idmap, - }, - }, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - SelinuxRelabel: true, - UidMappings: idmap, - GidMappings: idmap, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - UidMappings: idmap, - GidMappings: idmap, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - UidMappings: idmap, - GidMappings: idmap, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - { - desc: "should use host /dev/shm when host ipc is set", - securityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE}, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: "/dev/shm", - Readonly: false, - }, - }, - }, - { - desc: "should skip container mounts if already mounted by CRI", - criMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: "/test-etc-hostname", - }, - { - ContainerPath: "/etc/hosts", - HostPath: "/test-etc-host", - }, - { - ContainerPath: resolvConfPath, - HostPath: "test-resolv-conf", - }, - { - ContainerPath: "/dev/shm", - HostPath: "test-dev-shm", - }, - }, - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: nil, - }, - { - desc: "should skip hostname mount if the old sandbox doesn't have hostname file", - statFn: func(path string) (os.FileInfo, error) { - assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path) - return nil, errors.New("random error") - }, - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Mounts: test.criMounts, - Linux: &runtime.LinuxContainerConfig{ - SecurityContext: test.securityContext, - }, - } - c := newTestCRIService() - c.os.(*ostesting.FakeOS).StatFn = test.statFn - mounts := c.linuxContainerMounts(testSandboxID, config) - assert.Equal(t, test.expectedMounts, mounts, test.desc) - }) - } -} diff --git a/pkg/cri/sbserver/container_create_windows.go b/pkg/cri/sbserver/container_create_windows.go deleted file mode 100644 index 9beb2a44a..000000000 --- a/pkg/cri/sbserver/container_create_windows.go +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "strconv" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/snapshots" -) - -// No extra spec options needed for windows. -func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - return nil, nil -} - -// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer -func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { - var opts []snapshots.Opt - - switch snapshotterName { - case "windows": - rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes() - if rootfsSize != 0 { - labels := map[string]string{ - "containerd.io/snapshot/windows/rootfs.sizebytes": strconv.FormatInt(rootfsSize, 10), - } - opts = append(opts, snapshots.WithLabels(labels)) - } - } - - return opts, nil -} diff --git a/pkg/cri/sbserver/container_create_windows_test.go b/pkg/cri/sbserver/container_create_windows_test.go deleted file mode 100644 index cb20823a4..000000000 --- a/pkg/cri/sbserver/container_create_windows_test.go +++ /dev/null @@ -1,256 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/config" -) - -func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, - *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{ - Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", - }, - Command: []string{"test", "command"}, - Args: []string{"test", "args"}, - WorkingDir: "test-cwd", - Envs: []*runtime.KeyValue{ - {Key: "k1", Value: "v1"}, - {Key: "k2", Value: "v2"}, - {Key: "k3", Value: "v3=v3bis"}, - {Key: "k4", Value: "v4=v4bis=foop"}, - }, - Mounts: []*runtime.Mount{ - // everything default - { - ContainerPath: "container-path-1", - HostPath: "host-path-1", - }, - // readOnly - { - ContainerPath: "container-path-2", - HostPath: "host-path-2", - Readonly: true, - }, - }, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - Windows: &runtime.WindowsContainerConfig{ - Resources: &runtime.WindowsContainerResources{ - CpuShares: 100, - CpuCount: 200, - CpuMaximum: 300, - MemoryLimitInBytes: 400, - }, - SecurityContext: &runtime.WindowsContainerSecurityContext{ - RunAsUsername: "test-user", - CredentialSpec: "{\"test\": \"spec\"}", - HostProcess: false, - }, - }, - } - sandboxConfig := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-sandbox-name", - Uid: "test-sandbox-uid", - Namespace: "test-sandbox-ns", - Attempt: 2, - }, - Windows: &runtime.WindowsPodSandboxConfig{}, - Hostname: "test-hostname", - Annotations: map[string]string{"c": "d"}, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, - Entrypoint: []string{"/entrypoint"}, - Cmd: []string{"cmd"}, - WorkingDir: "/workspace", - User: "ContainerUser", - } - specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { - assert.Nil(t, spec.Root) - assert.Equal(t, "test-hostname", spec.Hostname) - assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) - assert.Equal(t, "test-cwd", spec.Process.Cwd) - assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") - assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") - - t.Logf("Check bind mount") - checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "", []string{"rw"}, nil) - checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "", []string{"ro"}, nil) - - t.Logf("Check resource limits") - assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, 100) - assert.EqualValues(t, *spec.Windows.Resources.CPU.Count, 200) - assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300) - assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300) - assert.EqualValues(t, *spec.Windows.Resources.Memory.Limit, 400) - - // Also checks if override of the image configs user is behaving. - t.Logf("Check username") - assert.Contains(t, spec.Process.User.Username, "test-user") - - t.Logf("Check credential spec") - assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}") - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) - - assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) - assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns") - - assert.Contains(t, spec.Annotations, annotations.SandboxUID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid") - - assert.Contains(t, spec.Annotations, annotations.SandboxName) - assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name") - - assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess) - assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false") - } - return config, sandboxConfig, imageConfig, specCheck -} - -func TestContainerWindowsNetworkNamespace(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - nsPath := "test-cni" - c := newTestCRIService() - - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, testSandboxID, testPid, spec) - assert.NotNil(t, spec.Windows) - assert.NotNil(t, spec.Windows.Network) - assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace) -} - -func TestMountCleanPath(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - nsPath := "test-cni" - c := newTestCRIService() - - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{ - ContainerPath: "c:/test/container-path", - HostPath: "c:/test/host-path", - }) - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, testSandboxID, testPid, spec) - checkMount(t, spec.Mounts, "c:\\test\\host-path", "c:\\test\\container-path", "", []string{"rw"}, nil) -} - -func TestMountNamedPipe(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - nsPath := "test-cni" - c := newTestCRIService() - - containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{ - ContainerPath: `\\.\pipe\foo`, - HostPath: `\\.\pipe\foo`, - }) - spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, testSandboxID, testPid, spec) - checkMount(t, spec.Mounts, `\\.\pipe\foo`, `\\.\pipe\foo`, "", []string{"rw"}, nil) -} - -func TestHostProcessRequirements(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() - ociRuntime := config.Runtime{} - c := newTestCRIService() - for _, test := range []struct { - desc string - containerHostProcess bool - sandboxHostProcess bool - expectError bool - }{ - { - desc: "hostprocess container in non-hostprocess sandbox should fail", - containerHostProcess: true, - sandboxHostProcess: false, - expectError: true, - }, - { - desc: "hostprocess container in hostprocess sandbox should be fine", - containerHostProcess: true, - sandboxHostProcess: true, - expectError: false, - }, - { - desc: "non-hostprocess container in hostprocess sandbox should fail", - containerHostProcess: false, - sandboxHostProcess: true, - expectError: true, - }, - { - desc: "non-hostprocess container in non-hostprocess sandbox should be fine", - containerHostProcess: false, - sandboxHostProcess: false, - expectError: false, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - containerConfig.Windows.SecurityContext.HostProcess = test.containerHostProcess - sandboxConfig.Windows.SecurityContext = &runtime.WindowsSandboxSecurityContext{ - HostProcess: test.sandboxHostProcess, - } - _, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) - if test.expectError { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - }) - } -} diff --git a/pkg/cri/sbserver/container_events.go b/pkg/cri/sbserver/container_events.go deleted file mode 100644 index 563cd26c9..000000000 --- a/pkg/cri/sbserver/container_events.go +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) GetContainerEvents(r *runtime.GetEventsRequest, s runtime.RuntimeService_GetContainerEventsServer) error { - // TODO (https://github.com/containerd/containerd/issues/7318): - // replace with a real implementation that broadcasts containerEventsChan - // to all subscribers. - for event := range c.containerEventsChan { - if err := s.Send(&event); err != nil { - return err - } - } - return nil -} diff --git a/pkg/cri/sbserver/container_exec.go b/pkg/cri/sbserver/container_exec.go deleted file mode 100644 index 10d716b27..000000000 --- a/pkg/cri/sbserver/container_exec.go +++ /dev/null @@ -1,37 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// Exec prepares a streaming endpoint to execute a command in the container, and returns the address. -func (c *criService) Exec(ctx context.Context, r *runtime.ExecRequest) (*runtime.ExecResponse, error) { - cntr, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("failed to find container %q in store: %w", r.GetContainerId(), err) - } - state := cntr.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING { - return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state)) - } - return c.streamServer.GetExec(r) -} diff --git a/pkg/cri/sbserver/container_execsync.go b/pkg/cri/sbserver/container_execsync.go deleted file mode 100644 index 8d62888e7..000000000 --- a/pkg/cri/sbserver/container_execsync.go +++ /dev/null @@ -1,308 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "bytes" - "context" - "fmt" - "io" - "syscall" - "time" - - "github.com/containerd/containerd" - containerdio "github.com/containerd/containerd/cio" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/oci" - "github.com/containerd/log" - "k8s.io/client-go/tools/remotecommand" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - cio "github.com/containerd/containerd/pkg/cri/io" - "github.com/containerd/containerd/pkg/cri/util" - cioutil "github.com/containerd/containerd/pkg/ioutil" -) - -type cappedWriter struct { - w io.WriteCloser - remain int -} - -func (cw *cappedWriter) Write(p []byte) (int, error) { - if cw.remain <= 0 { - return len(p), nil - } - - end := cw.remain - if end > len(p) { - end = len(p) - } - written, err := cw.w.Write(p[0:end]) - cw.remain -= written - - if err != nil { - return written, err - } - return len(p), nil -} - -func (cw *cappedWriter) Close() error { - return cw.w.Close() -} - -func (cw *cappedWriter) isFull() bool { - return cw.remain <= 0 -} - -// ExecSync executes a command in the container, and returns the stdout output. -// If command exits with a non-zero exit code, an error is returned. -func (c *criService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (*runtime.ExecSyncResponse, error) { - const maxStreamSize = 1024 * 1024 * 16 - - var stdout, stderr bytes.Buffer - - // cappedWriter truncates the output. In that case, the size of - // the ExecSyncResponse will hit the CRI plugin's gRPC response limit. - // Thus the callers outside of the containerd process (e.g. Kubelet) never see - // the truncated output. - cout := &cappedWriter{w: cioutil.NewNopWriteCloser(&stdout), remain: maxStreamSize} - cerr := &cappedWriter{w: cioutil.NewNopWriteCloser(&stderr), remain: maxStreamSize} - - exitCode, err := c.execInContainer(ctx, r.GetContainerId(), execOptions{ - cmd: r.GetCmd(), - stdout: cout, - stderr: cerr, - timeout: time.Duration(r.GetTimeout()) * time.Second, - }) - if err != nil { - return nil, fmt.Errorf("failed to exec in container: %w", err) - } - - return &runtime.ExecSyncResponse{ - Stdout: stdout.Bytes(), - Stderr: stderr.Bytes(), - ExitCode: int32(*exitCode), - }, nil -} - -// execOptions specifies how to execute command in container. -type execOptions struct { - cmd []string - stdin io.Reader - stdout io.WriteCloser - stderr io.WriteCloser - tty bool - resize <-chan remotecommand.TerminalSize - timeout time.Duration -} - -func (c *criService) execInternal(ctx context.Context, container containerd.Container, id string, opts execOptions) (*uint32, error) { - // Cancel the context before returning to ensure goroutines are stopped. - // This is important, because if `Start` returns error, `Wait` will hang - // forever unless we cancel the context. - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - var drainExecSyncIOTimeout time.Duration - var err error - - if c.config.DrainExecSyncIOTimeout != "" { - drainExecSyncIOTimeout, err = time.ParseDuration(c.config.DrainExecSyncIOTimeout) - if err != nil { - return nil, fmt.Errorf("failed to parse drain_exec_sync_io_timeout %q: %w", - c.config.DrainExecSyncIOTimeout, err) - } - } - - spec, err := container.Spec(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get container spec: %w", err) - } - task, err := container.Task(ctx, nil) - if err != nil { - return nil, fmt.Errorf("failed to load task: %w", err) - } - pspec := spec.Process - - pspec.Terminal = opts.tty - if opts.tty { - if err := oci.WithEnv([]string{"TERM=xterm"})(ctx, nil, nil, spec); err != nil { - return nil, fmt.Errorf("add TERM env var to spec: %w", err) - } - } - - pspec.Args = opts.cmd - - if opts.stdout == nil { - opts.stdout = cio.NewDiscardLogger() - } - if opts.stderr == nil { - opts.stderr = cio.NewDiscardLogger() - } - execID := util.GenerateID() - log.G(ctx).Debugf("Generated exec id %q for container %q", execID, id) - volatileRootDir := c.getVolatileContainerRootDir(id) - var execIO *cio.ExecIO - process, err := task.Exec(ctx, execID, pspec, - func(id string) (containerdio.IO, error) { - var err error - execIO, err = cio.NewExecIO(id, volatileRootDir, opts.tty, opts.stdin != nil) - return execIO, err - }, - ) - if err != nil { - return nil, fmt.Errorf("failed to create exec %q: %w", execID, err) - } - defer func() { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - if _, err := process.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - log.G(ctx).WithError(err).Errorf("Failed to delete exec process %q for container %q", execID, id) - } - }() - - exitCh, err := process.Wait(ctx) - if err != nil { - return nil, fmt.Errorf("failed to wait for process %q: %w", execID, err) - } - if err := process.Start(ctx); err != nil { - return nil, fmt.Errorf("failed to start exec %q: %w", execID, err) - } - - handleResizing(ctx, opts.resize, func(size remotecommand.TerminalSize) { - if err := process.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to resize process %q console for container %q", execID, id) - } - }) - - attachDone := execIO.Attach(cio.AttachOptions{ - Stdin: opts.stdin, - Stdout: opts.stdout, - Stderr: opts.stderr, - Tty: opts.tty, - StdinOnce: true, - CloseStdin: func() error { - return process.CloseIO(ctx, containerd.WithStdinCloser) - }, - }) - - execCtx := ctx - if opts.timeout > 0 { - var execCtxCancel context.CancelFunc - execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout) - defer execCtxCancel() - } - - select { - case <-execCtx.Done(): - // Ignore the not found error because the process may exit itself before killing. - if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to kill exec %q: %w", execID, err) - } - // Wait for the process to be killed. - exitRes := <-exitCh - log.G(ctx).Debugf("Timeout received while waiting for exec process kill %q code %d and error %v", - execID, exitRes.ExitCode(), exitRes.Error()) - - if err := drainExecSyncIO(ctx, process, drainExecSyncIOTimeout, attachDone); err != nil { - log.G(ctx).WithError(err).Warnf("failed to drain exec process %q io", execID) - } - - return nil, fmt.Errorf("timeout %v exceeded: %w", opts.timeout, execCtx.Err()) - case exitRes := <-exitCh: - code, _, err := exitRes.Result() - log.G(ctx).Debugf("Exec process %q exits with exit code %d and error %v", execID, code, err) - if err != nil { - return nil, fmt.Errorf("failed while waiting for exec %q: %w", execID, err) - } - - if err := drainExecSyncIO(ctx, process, drainExecSyncIOTimeout, attachDone); err != nil { - return nil, fmt.Errorf("failed to drain exec process %q io: %w", execID, err) - } - return &code, nil - } -} - -// execInContainer executes a command inside the container synchronously, and -// redirects stdio stream properly. -// This function only returns when the exec process exits, this means that: -// 1) As long as the exec process is running, the goroutine in the cri plugin -// will be running and wait for the exit code; -// 2) `kubectl exec -it` will hang until the exec process exits, even after io -// is detached. This is different from dockershim, which leaves the exec process -// running in background after io is detached. -// https://github.com/kubernetes/kubernetes/blob/v1.15.0/pkg/kubelet/dockershim/exec.go#L127 -// For example, if the `kubectl exec -it` process is killed, IO will be closed. In -// this case, the CRI plugin will still have a goroutine waiting for the exec process -// to exit and log the exit code, but dockershim won't. -func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) { - // Get container from our container store. - cntr, err := c.containerStore.Get(id) - - if err != nil { - return nil, fmt.Errorf("failed to find container %q in store: %w", id, err) - } - id = cntr.ID - - state := cntr.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING { - return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state)) - } - - return c.execInternal(ctx, cntr.Container, id, opts) -} - -// drainExecSyncIO drains process IO with timeout after exec init process exits. -// -// By default, the child processes spawned by exec process will inherit standard -// io file descriptors. The shim server creates a pipe as data channel. Both -// exec process and its children write data into the write end of the pipe. -// And the shim server will read data from the pipe. If the write end is still -// open, the shim server will continue to wait for data from pipe. -// -// If the exec command is like `bash -c "sleep 365d &"`, the exec process -// is bash and quit after create `sleep 365d`. But the `sleep 365d` will hold -// the write end of the pipe for a year! It doesn't make senses that CRI plugin -// should wait for it. -func drainExecSyncIO(ctx context.Context, execProcess containerd.Process, drainExecIOTimeout time.Duration, attachDone <-chan struct{}) error { - var timerCh <-chan time.Time - - if drainExecIOTimeout != 0 { - timer := time.NewTimer(drainExecIOTimeout) - defer timer.Stop() - - timerCh = timer.C - } - - select { - case <-timerCh: - case <-attachDone: - log.G(ctx).Debugf("Stream pipe for exec process %q done", execProcess.ID()) - return nil - } - - log.G(ctx).Debugf("Exec process %q exits but the io is still held by other processes. Trying to delete exec process to release io", execProcess.ID()) - _, err := execProcess.Delete(ctx, containerd.WithProcessKill) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to release exec io by deleting exec process %q: %w", - execProcess.ID(), err) - } - } - return fmt.Errorf("failed to drain exec process %q io in %s because io is still held by other processes", - execProcess.ID(), drainExecIOTimeout) -} diff --git a/pkg/cri/sbserver/container_execsync_test.go b/pkg/cri/sbserver/container_execsync_test.go deleted file mode 100644 index 3f3ef274d..000000000 --- a/pkg/cri/sbserver/container_execsync_test.go +++ /dev/null @@ -1,150 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "bytes" - "context" - "os" - "syscall" - "testing" - "time" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/cio" - cioutil "github.com/containerd/containerd/pkg/ioutil" - "github.com/stretchr/testify/assert" -) - -func TestCWWrite(t *testing.T) { - var buf bytes.Buffer - cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 10} - - n, err := cw.Write([]byte("hello")) - assert.NoError(t, err) - assert.Equal(t, 5, n) - - n, err = cw.Write([]byte("helloworld")) - assert.NoError(t, err, "no errors even it hits the cap") - assert.Equal(t, 10, n, "no indication of partial write") - assert.True(t, cw.isFull()) - assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped") - - _, err = cw.Write([]byte("world")) - assert.NoError(t, err) - assert.True(t, cw.isFull()) - assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped") -} - -func TestCWClose(t *testing.T) { - var buf bytes.Buffer - cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 5} - err := cw.Close() - assert.NoError(t, err) -} - -func TestDrainExecSyncIO(t *testing.T) { - ctx := context.TODO() - - t.Run("NoTimeout", func(t *testing.T) { - ep := &fakeExecProcess{ - id: t.Name(), - pid: uint32(os.Getpid()), - } - - attachDoneCh := make(chan struct{}) - time.AfterFunc(2*time.Second, func() { close(attachDoneCh) }) - assert.NoError(t, drainExecSyncIO(ctx, ep, 0, attachDoneCh)) - assert.Equal(t, 0, len(ep.actionEvents)) - }) - - t.Run("With3Seconds", func(t *testing.T) { - ep := &fakeExecProcess{ - id: t.Name(), - pid: uint32(os.Getpid()), - } - - attachDoneCh := make(chan struct{}) - time.AfterFunc(100*time.Second, func() { close(attachDoneCh) }) - assert.Error(t, drainExecSyncIO(ctx, ep, 3*time.Second, attachDoneCh)) - assert.Equal(t, []string{"Delete"}, ep.actionEvents) - }) -} - -type fakeExecProcess struct { - id string - pid uint32 - actionEvents []string -} - -// ID of the process -func (p *fakeExecProcess) ID() string { - return p.id -} - -// Pid is the system specific process id -func (p *fakeExecProcess) Pid() uint32 { - return p.pid -} - -// Start starts the process executing the user's defined binary -func (p *fakeExecProcess) Start(context.Context) error { - p.actionEvents = append(p.actionEvents, "Start") - return nil -} - -// Delete removes the process and any resources allocated returning the exit status -func (p *fakeExecProcess) Delete(context.Context, ...containerd.ProcessDeleteOpts) (*containerd.ExitStatus, error) { - p.actionEvents = append(p.actionEvents, "Delete") - return nil, nil -} - -// Kill sends the provided signal to the process -func (p *fakeExecProcess) Kill(context.Context, syscall.Signal, ...containerd.KillOpts) error { - p.actionEvents = append(p.actionEvents, "Kill") - return nil -} - -// Wait asynchronously waits for the process to exit, and sends the exit code to the returned channel -func (p *fakeExecProcess) Wait(context.Context) (<-chan containerd.ExitStatus, error) { - p.actionEvents = append(p.actionEvents, "Wait") - return nil, nil -} - -// CloseIO allows various pipes to be closed on the process -func (p *fakeExecProcess) CloseIO(context.Context, ...containerd.IOCloserOpts) error { - p.actionEvents = append(p.actionEvents, "CloseIO") - return nil -} - -// Resize changes the width and height of the process's terminal -func (p *fakeExecProcess) Resize(ctx context.Context, w, h uint32) error { - p.actionEvents = append(p.actionEvents, "Resize") - return nil -} - -// IO returns the io set for the process -func (p *fakeExecProcess) IO() cio.IO { - p.actionEvents = append(p.actionEvents, "IO") - return nil -} - -// Status returns the executing status of the process -func (p *fakeExecProcess) Status(context.Context) (containerd.Status, error) { - p.actionEvents = append(p.actionEvents, "Status") - return containerd.Status{}, nil -} diff --git a/pkg/cri/sbserver/container_list.go b/pkg/cri/sbserver/container_list.go deleted file mode 100644 index 51cb10268..000000000 --- a/pkg/cri/sbserver/container_list.go +++ /dev/null @@ -1,116 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "time" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -// ListContainers lists all containers matching the filter. -func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) { - start := time.Now() - // List all containers from store. - containersInStore := c.containerStore.List() - - var containers []*runtime.Container - for _, container := range containersInStore { - containers = append(containers, toCRIContainer(container)) - } - - containers = c.filterCRIContainers(containers, r.GetFilter()) - - containerListTimer.UpdateSince(start) - return &runtime.ListContainersResponse{Containers: containers}, nil -} - -// toCRIContainer converts internal container object into CRI container. -func toCRIContainer(container containerstore.Container) *runtime.Container { - status := container.Status.Get() - return &runtime.Container{ - Id: container.ID, - PodSandboxId: container.SandboxID, - Metadata: container.Config.GetMetadata(), - Image: container.Config.GetImage(), - ImageRef: container.ImageRef, - State: status.State(), - CreatedAt: status.CreatedAt, - Labels: container.Config.GetLabels(), - Annotations: container.Config.GetAnnotations(), - } -} - -func (c *criService) normalizeContainerFilter(filter *runtime.ContainerFilter) { - if cntr, err := c.containerStore.Get(filter.GetId()); err == nil { - filter.Id = cntr.ID - } - if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil { - filter.PodSandboxId = sb.ID - } -} - -// filterCRIContainers filters CRIContainers. -func (c *criService) filterCRIContainers(containers []*runtime.Container, filter *runtime.ContainerFilter) []*runtime.Container { - if filter == nil { - return containers - } - - // The containerd cri plugin supports short ids so long as there is only one - // match. So we do a lookup against the store here if a pod id has been - // included in the filter. - sb := filter.GetPodSandboxId() - if sb != "" { - sandbox, err := c.sandboxStore.Get(sb) - if err == nil { - sb = sandbox.ID - } - } - - c.normalizeContainerFilter(filter) - filtered := []*runtime.Container{} - for _, cntr := range containers { - if filter.GetId() != "" && filter.GetId() != cntr.Id { - continue - } - if sb != "" && sb != cntr.PodSandboxId { - continue - } - if filter.GetState() != nil && filter.GetState().GetState() != cntr.State { - continue - } - if filter.GetLabelSelector() != nil { - match := true - for k, v := range filter.GetLabelSelector() { - got, ok := cntr.Labels[k] - if !ok || got != v { - match = false - break - } - } - if !match { - continue - } - } - filtered = append(filtered, cntr) - } - - return filtered -} diff --git a/pkg/cri/sbserver/container_list_test.go b/pkg/cri/sbserver/container_list_test.go deleted file mode 100644 index ce4f59bd1..000000000 --- a/pkg/cri/sbserver/container_list_test.go +++ /dev/null @@ -1,366 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -func TestToCRIContainer(t *testing.T) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{Image: "test-image"}, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - } - createdAt := time.Now().UnixNano() - container, err := containerstore.NewContainer( - containerstore.Metadata{ - ID: "test-id", - Name: "test-name", - SandboxID: "test-sandbox-id", - Config: config, - ImageRef: "test-image-ref", - }, - containerstore.WithFakeStatus( - containerstore.Status{ - Pid: 1234, - CreatedAt: createdAt, - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - ExitCode: 1, - Reason: "test-reason", - Message: "test-message", - }, - ), - ) - assert.NoError(t, err) - expect := &runtime.Container{ - Id: "test-id", - PodSandboxId: "test-sandbox-id", - Metadata: config.GetMetadata(), - Image: config.GetImage(), - ImageRef: "test-image-ref", - State: runtime.ContainerState_CONTAINER_EXITED, - CreatedAt: createdAt, - Labels: config.GetLabels(), - Annotations: config.GetAnnotations(), - } - c := toCRIContainer(container) - assert.Equal(t, expect, c) -} - -func TestFilterContainers(t *testing.T) { - c := newTestCRIService() - - testContainers := []*runtime.Container{ - { - Id: "1", - PodSandboxId: "s-1", - Metadata: &runtime.ContainerMetadata{Name: "name-1", Attempt: 1}, - State: runtime.ContainerState_CONTAINER_RUNNING, - }, - { - Id: "2", - PodSandboxId: "s-2", - Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 2}, - State: runtime.ContainerState_CONTAINER_EXITED, - Labels: map[string]string{"a": "b"}, - }, - { - Id: "3", - PodSandboxId: "s-2", - Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 3}, - State: runtime.ContainerState_CONTAINER_CREATED, - Labels: map[string]string{"c": "d"}, - }, - } - for _, test := range []struct { - desc string - filter *runtime.ContainerFilter - expect []*runtime.Container - }{ - { - desc: "no filter", - expect: testContainers, - }, - { - desc: "id filter", - filter: &runtime.ContainerFilter{Id: "2"}, - expect: []*runtime.Container{testContainers[1]}, - }, - { - desc: "state filter", - filter: &runtime.ContainerFilter{ - State: &runtime.ContainerStateValue{ - State: runtime.ContainerState_CONTAINER_EXITED, - }, - }, - expect: []*runtime.Container{testContainers[1]}, - }, - { - desc: "label filter", - filter: &runtime.ContainerFilter{ - LabelSelector: map[string]string{"a": "b"}, - }, - expect: []*runtime.Container{testContainers[1]}, - }, - { - desc: "sandbox id filter", - filter: &runtime.ContainerFilter{PodSandboxId: "s-2"}, - expect: []*runtime.Container{testContainers[1], testContainers[2]}, - }, - { - desc: "mixed filter not matched", - filter: &runtime.ContainerFilter{ - Id: "1", - PodSandboxId: "s-2", - LabelSelector: map[string]string{"a": "b"}, - }, - expect: []*runtime.Container{}, - }, - { - desc: "mixed filter matched", - filter: &runtime.ContainerFilter{ - PodSandboxId: "s-2", - State: &runtime.ContainerStateValue{ - State: runtime.ContainerState_CONTAINER_CREATED, - }, - LabelSelector: map[string]string{"c": "d"}, - }, - expect: []*runtime.Container{testContainers[2]}, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - filtered := c.filterCRIContainers(testContainers, test.filter) - assert.Equal(t, test.expect, filtered, test.desc) - }) - } -} - -// containerForTest is a helper type for test. -type containerForTest struct { - metadata containerstore.Metadata - status containerstore.Status -} - -func (c containerForTest) toContainer() (containerstore.Container, error) { - return containerstore.NewContainer( - c.metadata, - containerstore.WithFakeStatus(c.status), - ) -} - -func TestListContainers(t *testing.T) { - c := newTestCRIService() - sandboxesInStore := []sandboxstore.Sandbox{ - sandboxstore.NewSandbox( - sandboxstore.Metadata{ - ID: "s-1abcdef1234", - Name: "sandboxname-1", - Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-1"}}, - }, - sandboxstore.Status{ - State: sandboxstore.StateReady, - }, - ), - sandboxstore.NewSandbox( - sandboxstore.Metadata{ - ID: "s-2abcdef1234", - Name: "sandboxname-2", - Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-2"}}, - }, - sandboxstore.Status{ - State: sandboxstore.StateNotReady, - }, - ), - } - createdAt := time.Now().UnixNano() - startedAt := time.Now().UnixNano() - finishedAt := time.Now().UnixNano() - containersInStore := []containerForTest{ - { - metadata: containerstore.Metadata{ - ID: "c-1container", - Name: "name-1", - SandboxID: "s-1abcdef1234", - Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-1"}}, - }, - status: containerstore.Status{CreatedAt: createdAt}, - }, - { - metadata: containerstore.Metadata{ - ID: "c-2container", - Name: "name-2", - SandboxID: "s-1abcdef1234", - Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-2"}}, - }, - status: containerstore.Status{ - CreatedAt: createdAt, - StartedAt: startedAt, - }, - }, - { - metadata: containerstore.Metadata{ - ID: "c-3container", - Name: "name-3", - SandboxID: "s-1abcdef1234", - Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-3"}}, - }, - status: containerstore.Status{ - CreatedAt: createdAt, - StartedAt: startedAt, - FinishedAt: finishedAt, - }, - }, - { - metadata: containerstore.Metadata{ - ID: "c-4container", - Name: "name-4", - SandboxID: "s-2abcdef1234", - Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-4"}}, - }, - status: containerstore.Status{ - CreatedAt: createdAt, - }, - }, - } - - expectedContainers := []*runtime.Container{ - { - Id: "c-1container", - PodSandboxId: "s-1abcdef1234", - Metadata: &runtime.ContainerMetadata{Name: "name-1"}, - State: runtime.ContainerState_CONTAINER_CREATED, - CreatedAt: createdAt, - }, - { - Id: "c-2container", - PodSandboxId: "s-1abcdef1234", - Metadata: &runtime.ContainerMetadata{Name: "name-2"}, - State: runtime.ContainerState_CONTAINER_RUNNING, - CreatedAt: createdAt, - }, - { - Id: "c-3container", - PodSandboxId: "s-1abcdef1234", - Metadata: &runtime.ContainerMetadata{Name: "name-3"}, - State: runtime.ContainerState_CONTAINER_EXITED, - CreatedAt: createdAt, - }, - { - Id: "c-4container", - PodSandboxId: "s-2abcdef1234", - Metadata: &runtime.ContainerMetadata{Name: "name-4"}, - State: runtime.ContainerState_CONTAINER_CREATED, - CreatedAt: createdAt, - }, - } - - // Inject test sandbox metadata - for _, sb := range sandboxesInStore { - assert.NoError(t, c.sandboxStore.Add(sb)) - } - - // Inject test container metadata - for _, cntr := range containersInStore { - container, err := cntr.toContainer() - assert.NoError(t, err) - assert.NoError(t, c.containerStore.Add(container)) - } - - for _, testdata := range []struct { - desc string - filter *runtime.ContainerFilter - expect []*runtime.Container - }{ - { - desc: "test without filter", - filter: &runtime.ContainerFilter{}, - expect: expectedContainers, - }, - { - desc: "test filter by sandboxid", - filter: &runtime.ContainerFilter{ - PodSandboxId: "s-1abcdef1234", - }, - expect: expectedContainers[:3], - }, - { - desc: "test filter by truncated sandboxid", - filter: &runtime.ContainerFilter{ - PodSandboxId: "s-1", - }, - expect: expectedContainers[:3], - }, - { - desc: "test filter by containerid", - filter: &runtime.ContainerFilter{ - Id: "c-1container", - }, - expect: expectedContainers[:1], - }, - { - desc: "test filter by truncated containerid", - filter: &runtime.ContainerFilter{ - Id: "c-1", - }, - expect: expectedContainers[:1], - }, - { - desc: "test filter by containerid and sandboxid", - filter: &runtime.ContainerFilter{ - Id: "c-1container", - PodSandboxId: "s-1abcdef1234", - }, - expect: expectedContainers[:1], - }, - { - desc: "test filter by truncated containerid and truncated sandboxid", - filter: &runtime.ContainerFilter{ - Id: "c-1", - PodSandboxId: "s-1", - }, - expect: expectedContainers[:1], - }, - } { - testdata := testdata - t.Run(testdata.desc, func(t *testing.T) { - resp, err := c.ListContainers(context.Background(), &runtime.ListContainersRequest{Filter: testdata.filter}) - assert.NoError(t, err) - require.NotNil(t, resp) - containers := resp.GetContainers() - assert.Len(t, containers, len(testdata.expect)) - for _, cntr := range testdata.expect { - assert.Contains(t, containers, cntr) - } - }) - } -} diff --git a/pkg/cri/sbserver/container_log_reopen.go b/pkg/cri/sbserver/container_log_reopen.go deleted file mode 100644 index 96d726c28..000000000 --- a/pkg/cri/sbserver/container_log_reopen.go +++ /dev/null @@ -1,52 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ReopenContainerLog asks the cri plugin to reopen the stdout/stderr log file for the container. -// This is often called after the log file has been rotated. -func (c *criService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (*runtime.ReopenContainerLogResponse, error) { - container, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) - } - - if container.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING { - return nil, errors.New("container is not running") - } - - // Create new container logger and replace the existing ones. - stdoutWC, stderrWC, err := c.createContainerLoggers(container.LogPath, container.Config.GetTty()) - if err != nil { - return nil, err - } - oldStdoutWC, oldStderrWC := container.IO.AddOutput("log", stdoutWC, stderrWC) - if oldStdoutWC != nil { - oldStdoutWC.Close() - } - if oldStderrWC != nil { - oldStderrWC.Close() - } - return &runtime.ReopenContainerLogResponse{}, nil -} diff --git a/pkg/cri/sbserver/container_remove.go b/pkg/cri/sbserver/container_remove.go deleted file mode 100644 index 83cde61a3..000000000 --- a/pkg/cri/sbserver/container_remove.go +++ /dev/null @@ -1,164 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - "time" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/errdefs" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - "github.com/containerd/log" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// RemoveContainer removes the container. -func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) { - start := time.Now() - ctrID := r.GetContainerId() - container, err := c.containerStore.Get(ctrID) - if err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", ctrID, err) - } - // Do not return error if container metadata doesn't exist. - log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID) - return &runtime.RemoveContainerResponse{}, nil - } - id := container.ID - i, err := container.Container.Info(ctx) - if err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("get container info: %w", err) - } - // Since containerd doesn't see the container and criservice's content store does, - // we should try to recover from this state by removing entry for this container - // from the container store as well and return successfully. - log.G(ctx).WithError(err).Warn("get container info failed") - c.containerStore.Delete(ctrID) - c.containerNameIndex.ReleaseByKey(ctrID) - return &runtime.RemoveContainerResponse{}, nil - } - - // Forcibly stop the containers if they are in running or unknown state - state := container.Status.Get().State() - if state == runtime.ContainerState_CONTAINER_RUNNING || - state == runtime.ContainerState_CONTAINER_UNKNOWN { - log.L.Infof("Forcibly stopping container %q", id) - if err := c.stopContainer(ctx, container, 0); err != nil { - return nil, fmt.Errorf("failed to forcibly stop container %q: %w", id, err) - } - - } - - // Set removing state to prevent other start/remove operations against this container - // while it's being removed. - if err := setContainerRemoving(container); err != nil { - return nil, fmt.Errorf("failed to set removing state for container %q: %w", id, err) - } - defer func() { - if retErr != nil { - // Reset removing if remove failed. - if err := resetContainerRemoving(container); err != nil { - log.G(ctx).WithError(err).Errorf("failed to reset removing state for container %q", id) - } - } - }() - - sandbox, err := c.sandboxStore.Get(container.SandboxID) - if err != nil { - err = c.nri.RemoveContainer(ctx, nil, &container) - } else { - err = c.nri.RemoveContainer(ctx, &sandbox, &container) - } - if err != nil { - log.G(ctx).WithError(err).Error("NRI failed to remove container") - } - - // NOTE(random-liu): Docker set container to "Dead" state when start removing the - // container so as to avoid start/restart the container again. However, for current - // kubelet implementation, we'll never start a container once we decide to remove it, - // so we don't need the "Dead" state for now. - - // Delete containerd container. - if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to delete containerd container %q: %w", id, err) - } - log.G(ctx).Tracef("Remove called for containerd container %q that does not exist", id) - } - - // Delete container checkpoint. - if err := container.Delete(); err != nil { - return nil, fmt.Errorf("failed to delete container checkpoint for %q: %w", id, err) - } - - containerRootDir := c.getContainerRootDir(id) - if err := ensureRemoveAll(ctx, containerRootDir); err != nil { - return nil, fmt.Errorf("failed to remove container root directory %q: %w", - containerRootDir, err) - } - volatileContainerRootDir := c.getVolatileContainerRootDir(id) - if err := ensureRemoveAll(ctx, volatileContainerRootDir); err != nil { - return nil, fmt.Errorf("failed to remove volatile container root directory %q: %w", - volatileContainerRootDir, err) - } - - c.containerStore.Delete(id) - - c.containerNameIndex.ReleaseByKey(id) - - c.generateAndSendContainerEvent(ctx, id, container.SandboxID, runtime.ContainerEventType_CONTAINER_DELETED_EVENT) - - containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start) - - return &runtime.RemoveContainerResponse{}, nil -} - -// setContainerRemoving sets the container into removing state. In removing state, the -// container will not be started or removed again. -func setContainerRemoving(container containerstore.Container) error { - return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { - // Do not remove container if it's still running or unknown. - if status.State() == runtime.ContainerState_CONTAINER_RUNNING { - return status, errors.New("container is still running, to stop first") - } - if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN { - return status, errors.New("container state is unknown, to stop first") - } - if status.Starting { - return status, errors.New("container is in starting state, can't be removed") - } - if status.Removing { - return status, errors.New("container is already in removing state") - } - status.Removing = true - return status, nil - }) -} - -// resetContainerRemoving resets the container removing state on remove failure. So -// that we could remove the container again. -func resetContainerRemoving(container containerstore.Container) error { - return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { - status.Removing = false - return status, nil - }) -} diff --git a/pkg/cri/sbserver/container_remove_test.go b/pkg/cri/sbserver/container_remove_test.go deleted file mode 100644 index e00278b82..000000000 --- a/pkg/cri/sbserver/container_remove_test.go +++ /dev/null @@ -1,92 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -// TestSetContainerRemoving tests setContainerRemoving sets removing -// state correctly. -func TestSetContainerRemoving(t *testing.T) { - testID := "test-id" - for _, test := range []struct { - desc string - status containerstore.Status - expectErr bool - }{ - { - desc: "should return error when container is in running state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - }, - expectErr: true, - }, - { - desc: "should return error when container is in starting state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - Starting: true, - }, - expectErr: true, - }, - { - desc: "should return error when container is in removing state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - Removing: true, - }, - expectErr: true, - }, - { - desc: "should not return error when container is not running and removing", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - }, - expectErr: false, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - container, err := containerstore.NewContainer( - containerstore.Metadata{ID: testID}, - containerstore.WithFakeStatus(test.status), - ) - assert.NoError(t, err) - err = setContainerRemoving(container) - if test.expectErr { - assert.Error(t, err) - assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated") - } else { - assert.NoError(t, err) - assert.True(t, container.Status.Get().Removing, "removing should be set") - assert.NoError(t, resetContainerRemoving(container)) - assert.False(t, container.Status.Get().Removing, "removing should be reset") - } - }) - } -} diff --git a/pkg/cri/sbserver/container_start.go b/pkg/cri/sbserver/container_start.go deleted file mode 100644 index ddc6b1122..000000000 --- a/pkg/cri/sbserver/container_start.go +++ /dev/null @@ -1,252 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - "io" - "time" - - "github.com/containerd/containerd" - containerdio "github.com/containerd/containerd/cio" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/log" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - cio "github.com/containerd/containerd/pkg/cri/io" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" - cioutil "github.com/containerd/containerd/pkg/ioutil" -) - -// StartContainer starts the container. -func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) { - start := time.Now() - cntr, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) - } - - info, err := cntr.Container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("get container info: %w", err) - } - - id := cntr.ID - meta := cntr.Metadata - container := cntr.Container - config := meta.Config - - // Set starting state to prevent other start/remove operations against this container - // while it's being started. - if err := setContainerStarting(cntr); err != nil { - return nil, fmt.Errorf("failed to set starting state for container %q: %w", id, err) - } - defer func() { - if retErr != nil { - // Set container to exited if fail to start. - if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { - status.Pid = 0 - status.FinishedAt = time.Now().UnixNano() - status.ExitCode = errorStartExitCode - status.Reason = errorStartReason - status.Message = retErr.Error() - return status, nil - }); err != nil { - log.G(ctx).WithError(err).Errorf("failed to set start failure state for container %q", id) - } - } - if err := resetContainerStarting(cntr); err != nil { - log.G(ctx).WithError(err).Errorf("failed to reset starting state for container %q", id) - } - }() - - // Get sandbox config from sandbox store. - sandbox, err := c.sandboxStore.Get(meta.SandboxID) - if err != nil { - return nil, fmt.Errorf("sandbox %q not found: %w", meta.SandboxID, err) - } - sandboxID := meta.SandboxID - if sandbox.Status.Get().State != sandboxstore.StateReady { - return nil, fmt.Errorf("sandbox container %q is not running", sandboxID) - } - - // Recheck target container validity in Linux namespace options. - if linux := config.GetLinux(); linux != nil { - nsOpts := linux.GetSecurityContext().GetNamespaceOptions() - if nsOpts.GetPid() == runtime.NamespaceMode_TARGET { - _, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId) - if err != nil { - return nil, fmt.Errorf("invalid target container: %w", err) - } - } - } - - ioCreation := func(id string) (_ containerdio.IO, err error) { - stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty()) - if err != nil { - return nil, fmt.Errorf("failed to create container loggers: %w", err) - } - cntr.IO.AddOutput("log", stdoutWC, stderrWC) - cntr.IO.Pipe() - return cntr.IO, nil - } - - ociRuntime, err := c.getSandboxRuntime(sandbox.Config, sandbox.Metadata.RuntimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) - } - - var taskOpts []containerd.NewTaskOpts - if ociRuntime.Path != "" { - taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path)) - } - task, err := container.NewTask(ctx, ioCreation, taskOpts...) - if err != nil { - return nil, fmt.Errorf("failed to create containerd task: %w", err) - } - defer func() { - if retErr != nil { - deferCtx, deferCancel := ctrdutil.DeferContext() - defer deferCancel() - // It's possible that task is deleted by event monitor. - if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - log.G(ctx).WithError(err).Errorf("Failed to delete containerd task %q", id) - } - } - }() - - // wait is a long running background request, no timeout needed. - exitCh, err := task.Wait(ctrdutil.NamespacedContext()) - if err != nil { - return nil, fmt.Errorf("failed to wait for containerd task: %w", err) - } - - defer func() { - if retErr != nil { - deferCtx, deferCancel := ctrdutil.DeferContext() - defer deferCancel() - err = c.nri.StopContainer(deferCtx, &sandbox, &cntr) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI stop failed for failed container %q", id) - } - } - }() - - err = c.nri.StartContainer(ctx, &sandbox, &cntr) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI container start failed") - return nil, fmt.Errorf("NRI container start failed: %w", err) - } - - // Start containerd task. - if err := task.Start(ctx); err != nil { - return nil, fmt.Errorf("failed to start containerd task %q: %w", id, err) - } - - // Update container start timestamp. - if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { - status.Pid = task.Pid() - status.StartedAt = time.Now().UnixNano() - return status, nil - }); err != nil { - return nil, fmt.Errorf("failed to update container %q state: %w", id, err) - } - - // It handles the TaskExit event and update container state after this. - c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh) - - c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_STARTED_EVENT) - - err = c.nri.PostStartContainer(ctx, &sandbox, &cntr) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI post-start notification failed") - } - - containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start) - - return &runtime.StartContainerResponse{}, nil -} - -// setContainerStarting sets the container into starting state. In starting state, the -// container will not be removed or started again. -func setContainerStarting(container containerstore.Container) error { - return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { - // Return error if container is not in created state. - if status.State() != runtime.ContainerState_CONTAINER_CREATED { - return status, fmt.Errorf("container is in %s state", criContainerStateToString(status.State())) - } - // Do not start the container when there is a removal in progress. - if status.Removing { - return status, errors.New("container is in removing state, can't be started") - } - if status.Starting { - return status, errors.New("container is already in starting state") - } - status.Starting = true - return status, nil - }) -} - -// resetContainerStarting resets the container starting state on start failure. So -// that we could remove the container later. -func resetContainerStarting(container containerstore.Container) error { - return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { - status.Starting = false - return status, nil - }) -} - -// createContainerLoggers creates container loggers and return write closer for stdout and stderr. -func (c *criService) createContainerLoggers(logPath string, tty bool) (stdout io.WriteCloser, stderr io.WriteCloser, err error) { - if logPath != "" { - // Only generate container log when log path is specified. - f, err := openLogFile(logPath) - if err != nil { - return nil, nil, fmt.Errorf("failed to create and open log file: %w", err) - } - defer func() { - if err != nil { - f.Close() - } - }() - var stdoutCh, stderrCh <-chan struct{} - wc := cioutil.NewSerialWriteCloser(f) - stdout, stdoutCh = cio.NewCRILogger(logPath, wc, cio.Stdout, c.config.MaxContainerLogLineSize) - // Only redirect stderr when there is no tty. - if !tty { - stderr, stderrCh = cio.NewCRILogger(logPath, wc, cio.Stderr, c.config.MaxContainerLogLineSize) - } - go func() { - if stdoutCh != nil { - <-stdoutCh - } - if stderrCh != nil { - <-stderrCh - } - log.L.Debugf("Finish redirecting log file %q, closing it", logPath) - f.Close() - }() - } else { - stdout = cio.NewDiscardLogger() - stderr = cio.NewDiscardLogger() - } - return -} diff --git a/pkg/cri/sbserver/container_start_test.go b/pkg/cri/sbserver/container_start_test.go deleted file mode 100644 index 79340e921..000000000 --- a/pkg/cri/sbserver/container_start_test.go +++ /dev/null @@ -1,106 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -// TestSetContainerStarting tests setContainerStarting sets removing -// state correctly. -func TestSetContainerStarting(t *testing.T) { - testID := "test-id" - for _, test := range []struct { - desc string - status containerstore.Status - expectErr bool - }{ - { - desc: "should not return error when container is in created state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - }, - expectErr: false, - }, - { - desc: "should return error when container is in running state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - }, - expectErr: true, - }, - { - desc: "should return error when container is in exited state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - }, - expectErr: true, - }, - { - desc: "should return error when container is in unknown state", - status: containerstore.Status{ - CreatedAt: 0, - StartedAt: 0, - FinishedAt: 0, - }, - expectErr: true, - }, - { - desc: "should return error when container is in starting state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - Starting: true, - }, - expectErr: true, - }, - { - desc: "should return error when container is in removing state", - status: containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - Removing: true, - }, - expectErr: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - container, err := containerstore.NewContainer( - containerstore.Metadata{ID: testID}, - containerstore.WithFakeStatus(test.status), - ) - assert.NoError(t, err) - err = setContainerStarting(container) - if test.expectErr { - assert.Error(t, err) - assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated") - } else { - assert.NoError(t, err) - assert.True(t, container.Status.Get().Starting, "starting should be set") - assert.NoError(t, resetContainerStarting(container)) - assert.False(t, container.Status.Get().Starting, "starting should be reset") - } - }) - } -} diff --git a/pkg/cri/sbserver/container_stats.go b/pkg/cri/sbserver/container_stats.go deleted file mode 100644 index 6b643f88f..000000000 --- a/pkg/cri/sbserver/container_stats.go +++ /dev/null @@ -1,53 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - "github.com/containerd/containerd/api/services/tasks/v1" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ContainerStats returns stats of the container. If the container does not -// exist, the call returns an error. -func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerStatsRequest) (*runtime.ContainerStatsResponse, error) { - cntr, err := c.containerStore.Get(in.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("failed to find container: %w", err) - } - request := &tasks.MetricsRequest{Filters: []string{"id==" + cntr.ID}} - resp, err := c.client.TaskService().Metrics(ctx, request) - if err != nil { - return nil, fmt.Errorf("failed to fetch metrics for task: %w", err) - } - if len(resp.Metrics) != 1 { - return nil, fmt.Errorf("unexpected metrics response: %+v", resp.Metrics) - } - - handler, err := c.getMetricsHandler(ctx, cntr.SandboxID) - if err != nil { - return nil, err - } - - cs, err := handler(cntr.Metadata, resp.Metrics[0]) - if err != nil { - return nil, fmt.Errorf("failed to decode container metrics: %w", err) - } - return &runtime.ContainerStatsResponse{Stats: cs}, nil -} diff --git a/pkg/cri/sbserver/container_stats_list.go b/pkg/cri/sbserver/container_stats_list.go deleted file mode 100644 index 72f441441..000000000 --- a/pkg/cri/sbserver/container_stats_list.go +++ /dev/null @@ -1,500 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - "reflect" - "time" - - wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" - cg1 "github.com/containerd/cgroups/v3/cgroup1/stats" - cg2 "github.com/containerd/cgroups/v3/cgroup2/stats" - "github.com/containerd/containerd/api/services/tasks/v1" - "github.com/containerd/containerd/api/types" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/pkg/cri/store/stats" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -// ListContainerStats returns stats of all running containers. -func (c *criService) ListContainerStats( - ctx context.Context, - in *runtime.ListContainerStatsRequest, -) (*runtime.ListContainerStatsResponse, error) { - request, containers, err := c.buildTaskMetricsRequest(in) - if err != nil { - return nil, fmt.Errorf("failed to build metrics request: %w", err) - } - resp, err := c.client.TaskService().Metrics(ctx, request) - if err != nil { - return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err) - } - criStats, err := c.toCRIContainerStats(ctx, resp.Metrics, containers) - if err != nil { - return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err) - } - return criStats, nil -} - -type metricsHandler func(containerstore.Metadata, *types.Metric) (*runtime.ContainerStats, error) - -// Returns a function to be used for transforming container metrics into the right format. -// Uses the platform the given sandbox advertises to implement its logic. If the platform is -// unsupported for metrics this will return a wrapped [errdefs.ErrNotImplemented]. -func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (metricsHandler, error) { - sandbox, err := c.sandboxStore.Get(sandboxID) - if err != nil { - return nil, fmt.Errorf("failed to find sandbox id %q: %w", sandboxID, err) - } - controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox controller: %w", err) - } - // Grab the platform that this containers sandbox advertises. Reason being, even if - // the host may be {insert platform}, if it virtualizes or emulates a different platform - // it will return stats in that format, and we need to handle the conversion logic based - // off of this info. - p, err := controller.Platform(ctx, sandboxID) - if err != nil { - return nil, err - } - - switch p.OS { - case "windows": - return c.windowsContainerMetrics, nil - case "linux": - return c.linuxContainerMetrics, nil - default: - return nil, fmt.Errorf("container metrics for platform %+v: %w", p, errdefs.ErrNotImplemented) - } -} - -func (c *criService) toCRIContainerStats( - ctx context.Context, - stats []*types.Metric, - containers []containerstore.Container, -) (*runtime.ListContainerStatsResponse, error) { - statsMap := make(map[string]*types.Metric) - for _, stat := range stats { - statsMap[stat.ID] = stat - } - containerStats := new(runtime.ListContainerStatsResponse) - - // Unfortunately if no filter was passed we're asking for every containers stats which - // generally belong to multiple different pods, who all might have different platforms. - // To avoid recalculating the right metricsHandler to invoke, if we've already calculated - // the platform and handler for a given sandbox just pull it from our map here. - var ( - err error - handler metricsHandler - ) - sandboxToMetricsHandler := make(map[string]metricsHandler) - for _, cntr := range containers { - h, ok := sandboxToMetricsHandler[cntr.SandboxID] - if !ok { - handler, err = c.getMetricsHandler(ctx, cntr.SandboxID) - if err != nil { - // If the sandbox is not found, it may have been removed. we need to check container whether it is still exist - if errdefs.IsNotFound(err) { - _, err = c.containerStore.Get(cntr.ID) - if err != nil && errdefs.IsNotFound(err) { - log.G(ctx).Warnf("container %q is not found, skip it", cntr.ID) - continue - } - } - return nil, fmt.Errorf("failed to get metrics handler for container %q: %w", cntr.ID, err) - } - sandboxToMetricsHandler[cntr.SandboxID] = handler - } else { - handler = h - } - - cs, err := handler(cntr.Metadata, statsMap[cntr.ID]) - if err != nil { - return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err) - } - - if cs.Cpu != nil && cs.Cpu.UsageCoreNanoSeconds != nil { - // this is a calculated value and should be computed for all OSes - nanoUsage, err := c.getUsageNanoCores(cntr.Metadata.ID, false, cs.Cpu.UsageCoreNanoSeconds.Value, time.Unix(0, cs.Cpu.Timestamp)) - if err != nil { - return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", cntr.Metadata.ID, err) - } - cs.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoUsage} - } - containerStats.Stats = append(containerStats.Stats, cs) - } - return containerStats, nil -} - -func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, currentUsageCoreNanoSeconds uint64, currentTimestamp time.Time) (uint64, error) { - var oldStats *stats.ContainerStats - - if isSandbox { - sandbox, err := c.sandboxStore.Get(containerID) - if err != nil { - return 0, fmt.Errorf("failed to get sandbox container: %s: %w", containerID, err) - } - oldStats = sandbox.Stats - } else { - container, err := c.containerStore.Get(containerID) - if err != nil { - return 0, fmt.Errorf("failed to get container ID: %s: %w", containerID, err) - } - oldStats = container.Stats - } - - if oldStats == nil { - newStats := &stats.ContainerStats{ - UsageCoreNanoSeconds: currentUsageCoreNanoSeconds, - Timestamp: currentTimestamp, - } - if isSandbox { - err := c.sandboxStore.UpdateContainerStats(containerID, newStats) - if err != nil { - return 0, fmt.Errorf("failed to update sandbox stats container ID: %s: %w", containerID, err) - } - } else { - err := c.containerStore.UpdateContainerStats(containerID, newStats) - if err != nil { - return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err) - } - } - return 0, nil - } - - nanoSeconds := currentTimestamp.UnixNano() - oldStats.Timestamp.UnixNano() - - // zero or negative interval - if nanoSeconds <= 0 { - return 0, nil - } - - newUsageNanoCores := uint64(float64(currentUsageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) / - float64(nanoSeconds) * float64(time.Second/time.Nanosecond)) - - newStats := &stats.ContainerStats{ - UsageCoreNanoSeconds: currentUsageCoreNanoSeconds, - Timestamp: currentTimestamp, - } - if isSandbox { - err := c.sandboxStore.UpdateContainerStats(containerID, newStats) - if err != nil { - return 0, fmt.Errorf("failed to update sandbox container stats: %s: %w", containerID, err) - } - } else { - err := c.containerStore.UpdateContainerStats(containerID, newStats) - if err != nil { - return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err) - } - } - - return newUsageNanoCores, nil -} - -func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) { - if cntr, err := c.containerStore.Get(filter.GetId()); err == nil { - filter.Id = cntr.ID - } - if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil { - filter.PodSandboxId = sb.ID - } -} - -// buildTaskMetricsRequest constructs a tasks.MetricsRequest based on -// the information in the stats request and the containerStore -func (c *criService) buildTaskMetricsRequest( - r *runtime.ListContainerStatsRequest, -) (*tasks.MetricsRequest, []containerstore.Container, error) { - req := &tasks.MetricsRequest{} - if r.GetFilter() == nil { - return req, c.containerStore.List(), nil - } - c.normalizeContainerStatsFilter(r.GetFilter()) - var containers []containerstore.Container - for _, cntr := range c.containerStore.List() { - if r.GetFilter().GetId() != "" && cntr.ID != r.GetFilter().GetId() { - continue - } - if r.GetFilter().GetPodSandboxId() != "" && cntr.SandboxID != r.GetFilter().GetPodSandboxId() { - continue - } - if r.GetFilter().GetLabelSelector() != nil && - !matchLabelSelector(r.GetFilter().GetLabelSelector(), cntr.Config.GetLabels()) { - continue - } - containers = append(containers, cntr) - req.Filters = append(req.Filters, "id=="+cntr.ID) - } - return req, containers, nil -} - -func matchLabelSelector(selector, labels map[string]string) bool { - for k, v := range selector { - if val, ok := labels[k]; ok { - if v != val { - return false - } - } else { - return false - } - } - return true -} - -func (c *criService) windowsContainerMetrics( - meta containerstore.Metadata, - stats *types.Metric, -) (*runtime.ContainerStats, error) { - var cs runtime.ContainerStats - var usedBytes, inodesUsed uint64 - sn, err := c.GetSnapshot(meta.ID) - // If snapshotstore doesn't have cached snapshot information - // set WritableLayer usage to zero - if err == nil { - usedBytes = sn.Size - inodesUsed = sn.Inodes - } - cs.WritableLayer = &runtime.FilesystemUsage{ - Timestamp: sn.Timestamp, - FsId: &runtime.FilesystemIdentifier{ - Mountpoint: c.imageFSPath, - }, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, - } - cs.Attributes = &runtime.ContainerAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - } - - if stats != nil { - s, err := typeurl.UnmarshalAny(stats.Data) - if err != nil { - return nil, fmt.Errorf("failed to extract container metrics: %w", err) - } - wstats := s.(*wstats.Statistics).GetWindows() - if wstats == nil { - return nil, errors.New("windows stats is empty") - } - if wstats.Processor != nil { - cs.Cpu = &runtime.CpuUsage{ - Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, - } - } - if wstats.Memory != nil { - cs.Memory = &runtime.MemoryUsage{ - Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, - }, - } - } - } - return &cs, nil -} - -func (c *criService) linuxContainerMetrics( - meta containerstore.Metadata, - stats *types.Metric, -) (*runtime.ContainerStats, error) { - var cs runtime.ContainerStats - var usedBytes, inodesUsed uint64 - sn, err := c.GetSnapshot(meta.ID) - // If snapshotstore doesn't have cached snapshot information - // set WritableLayer usage to zero - if err == nil { - usedBytes = sn.Size - inodesUsed = sn.Inodes - } - cs.WritableLayer = &runtime.FilesystemUsage{ - Timestamp: sn.Timestamp, - FsId: &runtime.FilesystemIdentifier{ - Mountpoint: c.imageFSPath, - }, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, - } - cs.Attributes = &runtime.ContainerAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - } - - if stats != nil { - var data interface{} - switch { - case typeurl.Is(stats.Data, (*cg1.Metrics)(nil)): - data = &cg1.Metrics{} - case typeurl.Is(stats.Data, (*cg2.Metrics)(nil)): - data = &cg2.Metrics{} - case typeurl.Is(stats.Data, (*wstats.Statistics)(nil)): - data = &wstats.Statistics{} - default: - return nil, errors.New("cannot convert metric data to cgroups.Metrics or windows.Statistics") - } - - if err := typeurl.UnmarshalTo(stats.Data, data); err != nil { - return nil, fmt.Errorf("failed to extract container metrics: %w", err) - } - - cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp)) - if err != nil { - return nil, fmt.Errorf("failed to obtain cpu stats: %w", err) - } - cs.Cpu = cpuStats - - memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp)) - if err != nil { - return nil, fmt.Errorf("failed to obtain memory stats: %w", err) - } - cs.Memory = memoryStats - } - - return &cs, nil -} - -// getWorkingSet calculates workingset memory from cgroup memory stats. -// The caller should make sure memory is not nil. -// workingset = usage - total_inactive_file -func getWorkingSet(memory *cg1.MemoryStat) uint64 { - if memory.Usage == nil { - return 0 - } - var workingSet uint64 - if memory.TotalInactiveFile < memory.Usage.Usage { - workingSet = memory.Usage.Usage - memory.TotalInactiveFile - } - return workingSet -} - -// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats. -// The caller should make sure memory is not nil. -// workingset = usage - inactive_file -func getWorkingSetV2(memory *cg2.MemoryStat) uint64 { - var workingSet uint64 - if memory.InactiveFile < memory.Usage { - workingSet = memory.Usage - memory.InactiveFile - } - return workingSet -} - -func isMemoryUnlimited(v uint64) bool { - // Size after which we consider memory to be "unlimited". This is not - // MaxInt64 due to rounding by the kernel. - // TODO: k8s or cadvisor should export this https://github.com/google/cadvisor/blob/2b6fbacac7598e0140b5bc8428e3bdd7d86cf5b9/metrics/prometheus.go#L1969-L1971 - const maxMemorySize = uint64(1 << 62) - - return v > maxMemorySize -} - -// https://github.com/kubernetes/kubernetes/blob/b47f8263e18c7b13dba33fba23187e5e0477cdbd/pkg/kubelet/stats/helper.go#L68-L71 -func getAvailableBytes(memory *cg1.MemoryStat, workingSetBytes uint64) uint64 { - // memory limit - working set bytes - if !isMemoryUnlimited(memory.Usage.Limit) { - return memory.Usage.Limit - workingSetBytes - } - return 0 -} - -func getAvailableBytesV2(memory *cg2.MemoryStat, workingSetBytes uint64) uint64 { - // memory limit (memory.max) for cgroupv2 - working set bytes - if !isMemoryUnlimited(memory.UsageLimit) { - return memory.UsageLimit - workingSetBytes - } - return 0 -} - -func (c *criService) cpuContainerStats(ID string, isSandbox bool, stats interface{}, timestamp time.Time) (*runtime.CpuUsage, error) { - switch metrics := stats.(type) { - case *cg1.Metrics: - metrics.GetCPU().GetUsage() - if metrics.CPU != nil && metrics.CPU.Usage != nil { - return &runtime.CpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total}, - }, nil - } - case *cg2.Metrics: - if metrics.CPU != nil { - // convert to nano seconds - usageCoreNanoSeconds := metrics.CPU.UsageUsec * 1000 - - return &runtime.CpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: usageCoreNanoSeconds}, - }, nil - } - default: - return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath()) - } - return nil, nil -} - -func (c *criService) memoryContainerStats(ID string, stats interface{}, timestamp time.Time) (*runtime.MemoryUsage, error) { - switch metrics := stats.(type) { - case *cg1.Metrics: - if metrics.Memory != nil && metrics.Memory.Usage != nil { - workingSetBytes := getWorkingSet(metrics.Memory) - - return &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: workingSetBytes, - }, - AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytes(metrics.Memory, workingSetBytes)}, - UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage.Usage}, - RssBytes: &runtime.UInt64Value{Value: metrics.Memory.TotalRSS}, - PageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgFault}, - MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgMajFault}, - }, nil - } - case *cg2.Metrics: - if metrics.Memory != nil { - workingSetBytes := getWorkingSetV2(metrics.Memory) - - return &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: workingSetBytes, - }, - AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytesV2(metrics.Memory, workingSetBytes)}, - UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage}, - // Use Anon memory for RSS as cAdvisor on cgroupv2 - // see https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799 - RssBytes: &runtime.UInt64Value{Value: metrics.Memory.Anon}, - PageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgfault}, - MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgmajfault}, - }, nil - } - default: - return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath()) - } - return nil, nil -} diff --git a/pkg/cri/sbserver/container_stats_list_test.go b/pkg/cri/sbserver/container_stats_list_test.go deleted file mode 100644 index 0a7bf877d..000000000 --- a/pkg/cri/sbserver/container_stats_list_test.go +++ /dev/null @@ -1,437 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "math" - "reflect" - "testing" - "time" - - v1 "github.com/containerd/cgroups/v3/cgroup1/stats" - v2 "github.com/containerd/cgroups/v3/cgroup2/stats" - "github.com/containerd/containerd/api/types" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestContainerMetricsCPUNanoCoreUsage(t *testing.T) { - c := newTestCRIService() - timestamp := time.Now() - secondAfterTimeStamp := timestamp.Add(time.Second) - ID := "ID" - - for _, test := range []struct { - desc string - firstCPUValue uint64 - secondCPUValue uint64 - expectedNanoCoreUsageFirst uint64 - expectedNanoCoreUsageSecond uint64 - }{ - { - desc: "metrics", - firstCPUValue: 50, - secondCPUValue: 500, - expectedNanoCoreUsageFirst: 0, - expectedNanoCoreUsageSecond: 450, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - container, err := containerstore.NewContainer( - containerstore.Metadata{ID: ID}, - ) - assert.NoError(t, err) - assert.Nil(t, container.Stats) - err = c.containerStore.Add(container) - assert.NoError(t, err) - - cpuUsage, err := c.getUsageNanoCores(ID, false, test.firstCPUValue, timestamp) - assert.NoError(t, err) - - container, err = c.containerStore.Get(ID) - assert.NoError(t, err) - assert.NotNil(t, container.Stats) - - assert.Equal(t, test.expectedNanoCoreUsageFirst, cpuUsage) - - cpuUsage, err = c.getUsageNanoCores(ID, false, test.secondCPUValue, secondAfterTimeStamp) - assert.NoError(t, err) - assert.Equal(t, test.expectedNanoCoreUsageSecond, cpuUsage) - - container, err = c.containerStore.Get(ID) - assert.NoError(t, err) - assert.NotNil(t, container.Stats) - }) - } -} - -func TestGetWorkingSet(t *testing.T) { - for _, test := range []struct { - desc string - memory *v1.MemoryStat - expected uint64 - }{ - { - desc: "nil memory usage", - memory: &v1.MemoryStat{}, - expected: 0, - }, - { - desc: "memory usage higher than inactive_total_file", - memory: &v1.MemoryStat{ - TotalInactiveFile: 1000, - Usage: &v1.MemoryEntry{Usage: 2000}, - }, - expected: 1000, - }, - { - desc: "memory usage lower than inactive_total_file", - memory: &v1.MemoryStat{ - TotalInactiveFile: 2000, - Usage: &v1.MemoryEntry{Usage: 1000}, - }, - expected: 0, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getWorkingSet(test.memory) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestGetWorkingSetV2(t *testing.T) { - for _, test := range []struct { - desc string - memory *v2.MemoryStat - expected uint64 - }{ - { - desc: "nil memory usage", - memory: &v2.MemoryStat{}, - expected: 0, - }, - { - desc: "memory usage higher than inactive_total_file", - memory: &v2.MemoryStat{ - InactiveFile: 1000, - Usage: 2000, - }, - expected: 1000, - }, - { - desc: "memory usage lower than inactive_total_file", - memory: &v2.MemoryStat{ - InactiveFile: 2000, - Usage: 1000, - }, - expected: 0, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getWorkingSetV2(test.memory) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestGetAvailableBytes(t *testing.T) { - for _, test := range []struct { - desc string - memory *v1.MemoryStat - workingSetBytes uint64 - expected uint64 - }{ - { - desc: "no limit", - memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: math.MaxUint64, // no limit - Usage: 1000, - }, - }, - workingSetBytes: 500, - expected: 0, - }, - { - desc: "with limit", - memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: 5000, - Usage: 1000, - }, - }, - workingSetBytes: 500, - expected: 5000 - 500, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getAvailableBytes(test.memory, test.workingSetBytes) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestGetAvailableBytesV2(t *testing.T) { - for _, test := range []struct { - desc string - memory *v2.MemoryStat - workingSetBytes uint64 - expected uint64 - }{ - { - desc: "no limit", - memory: &v2.MemoryStat{ - UsageLimit: math.MaxUint64, // no limit - Usage: 1000, - }, - workingSetBytes: 500, - expected: 0, - }, - { - desc: "with limit", - memory: &v2.MemoryStat{ - UsageLimit: 5000, - Usage: 1000, - }, - workingSetBytes: 500, - expected: 5000 - 500, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getAvailableBytesV2(test.memory, test.workingSetBytes) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestContainerMetricsMemory(t *testing.T) { - c := newTestCRIService() - timestamp := time.Now() - - for _, test := range []struct { - desc string - metrics interface{} - expected *runtime.MemoryUsage - }{ - { - desc: "v1 metrics - no memory limit", - metrics: &v1.Metrics{ - Memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: math.MaxUint64, // no limit - Usage: 1000, - }, - TotalRSS: 10, - TotalPgFault: 11, - TotalPgMajFault: 12, - TotalInactiveFile: 500, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 500}, - AvailableBytes: &runtime.UInt64Value{Value: 0}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 10}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - { - desc: "v1 metrics - memory limit", - metrics: &v1.Metrics{ - Memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: 5000, - Usage: 1000, - }, - TotalRSS: 10, - TotalPgFault: 11, - TotalPgMajFault: 12, - TotalInactiveFile: 500, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 500}, - AvailableBytes: &runtime.UInt64Value{Value: 4500}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 10}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - { - desc: "v2 metrics - memory limit", - metrics: &v2.Metrics{ - Memory: &v2.MemoryStat{ - Usage: 1000, - UsageLimit: 5000, - InactiveFile: 0, - Pgfault: 11, - Pgmajfault: 12, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, - AvailableBytes: &runtime.UInt64Value{Value: 4000}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 0}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - { - desc: "v2 metrics - no memory limit", - metrics: &v2.Metrics{ - Memory: &v2.MemoryStat{ - Usage: 1000, - UsageLimit: math.MaxUint64, // no limit - InactiveFile: 0, - Pgfault: 11, - Pgmajfault: 12, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, - AvailableBytes: &runtime.UInt64Value{Value: 0}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 0}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got, err := c.memoryContainerStats("ID", test.metrics, timestamp) - assert.NoError(t, err) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestListContainerStats(t *testing.T) { - c := newTestCRIService() - type args struct { - ctx context.Context - stats []*types.Metric - containers []containerstore.Container - } - tests := []struct { - name string - args args - before func() - after func() - want *runtime.ListContainerStatsResponse - wantErr bool - }{ - { - name: "args containers having c1,but containerStore not found c1, so filter c1", - args: args{ - ctx: context.Background(), - stats: []*types.Metric{ - { - ID: "c1", - }, - }, - containers: []containerstore.Container{ - { - Metadata: containerstore.Metadata{ - ID: "c1", - SandboxID: "s1", - }, - }, - }, - }, - want: &runtime.ListContainerStatsResponse{}, - }, - { - name: "args containers having c1,c2, but containerStore not found c1, so filter c1", - args: args{ - ctx: context.Background(), - stats: []*types.Metric{ - { - ID: "c1", - }, - { - ID: "c2", - }, - }, - containers: []containerstore.Container{ - { - Metadata: containerstore.Metadata{ - ID: "c1", - SandboxID: "s1", - }, - }, - { - Metadata: containerstore.Metadata{ - ID: "c2", - SandboxID: "s2", - }, - }, - }, - }, - before: func() { - c.containerStore.Add(containerstore.Container{ - Metadata: containerstore.Metadata{ - ID: "c2", - }, - }) - c.sandboxStore.Add(sandboxstore.Sandbox{ - Metadata: sandboxstore.Metadata{ - ID: "s2", - }, - }) - }, - wantErr: true, - want: nil, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.before != nil { - tt.before() - } - got, err := c.toCRIContainerStats(tt.args.ctx, tt.args.stats, tt.args.containers) - if tt.after != nil { - tt.after() - } - if (err != nil) != tt.wantErr { - t.Errorf("ListContainerStats() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("ListContainerStats() = %v, want %v", got, tt.want) - } - }) - } - -} diff --git a/pkg/cri/sbserver/container_status.go b/pkg/cri/sbserver/container_status.go deleted file mode 100644 index b9bcf7058..000000000 --- a/pkg/cri/sbserver/container_status.go +++ /dev/null @@ -1,185 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/pkg/cri/sbserver/images" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ContainerStatus inspects the container and returns the status. -func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (*runtime.ContainerStatusResponse, error) { - container, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) - } - - // TODO(random-liu): Clean up the following logic in CRI. - // Current assumption: - // * ImageSpec in container config is image ID. - // * ImageSpec in container status is image tag. - // * ImageRef in container status is repo digest. - spec := container.Config.GetImage() - imageRef := container.ImageRef - image, err := c.GetImage(imageRef) - if err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to get image %q: %w", imageRef, err) - } - } else { - repoTags, repoDigests := images.ParseImageReferences(image.References) - if len(repoTags) > 0 { - // Based on current behavior of dockershim, this field should be - // image tag. - spec = &runtime.ImageSpec{Image: repoTags[0]} - } - if len(repoDigests) > 0 { - // Based on the CRI definition, this field will be consumed by user. - imageRef = repoDigests[0] - } - } - status := toCRIContainerStatus(container, spec, imageRef) - if status.GetCreatedAt() == 0 { - // CRI doesn't allow CreatedAt == 0. - info, err := container.Container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get CreatedAt in %q state: %w", status.State, err) - } - status.CreatedAt = info.CreatedAt.UnixNano() - } - - info, err := toCRIContainerInfo(ctx, container, r.GetVerbose()) - if err != nil { - return nil, fmt.Errorf("failed to get verbose container info: %w", err) - } - - return &runtime.ContainerStatusResponse{ - Status: status, - Info: info, - }, nil -} - -// toCRIContainerStatus converts internal container object to CRI container status. -func toCRIContainerStatus(container containerstore.Container, spec *runtime.ImageSpec, imageRef string) *runtime.ContainerStatus { - meta := container.Metadata - status := container.Status.Get() - reason := status.Reason - if status.State() == runtime.ContainerState_CONTAINER_EXITED && reason == "" { - if status.ExitCode == 0 { - reason = completeExitReason - } else { - reason = errorExitReason - } - } - - // If container is in the created state, not set started and finished unix timestamps - var st, ft int64 - switch status.State() { - case runtime.ContainerState_CONTAINER_RUNNING: - // If container is in the running state, set started unix timestamps - st = status.StartedAt - case runtime.ContainerState_CONTAINER_EXITED, runtime.ContainerState_CONTAINER_UNKNOWN: - st, ft = status.StartedAt, status.FinishedAt - } - - return &runtime.ContainerStatus{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - State: status.State(), - CreatedAt: status.CreatedAt, - StartedAt: st, - FinishedAt: ft, - ExitCode: status.ExitCode, - Image: spec, - ImageRef: imageRef, - Reason: reason, - Message: status.Message, - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - Mounts: meta.Config.GetMounts(), - LogPath: meta.LogPath, - Resources: status.Resources, - } -} - -// ContainerInfo is extra information for a container. -type ContainerInfo struct { - // TODO(random-liu): Add sandboxID in CRI container status. - SandboxID string `json:"sandboxID"` - Pid uint32 `json:"pid"` - Removing bool `json:"removing"` - SnapshotKey string `json:"snapshotKey"` - Snapshotter string `json:"snapshotter"` - RuntimeType string `json:"runtimeType"` - RuntimeOptions interface{} `json:"runtimeOptions"` - Config *runtime.ContainerConfig `json:"config"` - RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"` -} - -// toCRIContainerInfo converts internal container object information to CRI container status response info map. -func toCRIContainerInfo(ctx context.Context, container containerstore.Container, verbose bool) (map[string]string, error) { - if !verbose { - return nil, nil - } - - meta := container.Metadata - status := container.Status.Get() - - // TODO(random-liu): Change CRI status info to use array instead of map. - ci := &ContainerInfo{ - SandboxID: container.SandboxID, - Pid: status.Pid, - Removing: status.Removing, - Config: meta.Config, - } - - var err error - ci.RuntimeSpec, err = container.Container.Spec(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get container runtime spec: %w", err) - } - - ctrInfo, err := container.Container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get container info: %w", err) - } - ci.SnapshotKey = ctrInfo.SnapshotKey - ci.Snapshotter = ctrInfo.Snapshotter - - runtimeOptions, err := getRuntimeOptions(ctrInfo) - if err != nil { - return nil, fmt.Errorf("failed to get runtime options: %w", err) - } - ci.RuntimeType = ctrInfo.Runtime.Name - ci.RuntimeOptions = runtimeOptions - - infoBytes, err := json.Marshal(ci) - if err != nil { - return nil, fmt.Errorf("failed to marshal info %v: %w", ci, err) - } - return map[string]string{ - "info": string(infoBytes), - }, nil -} diff --git a/pkg/cri/sbserver/container_status_test.go b/pkg/cri/sbserver/container_status_test.go deleted file mode 100644 index 88e6c02b9..000000000 --- a/pkg/cri/sbserver/container_status_test.go +++ /dev/null @@ -1,297 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "testing" - "time" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" -) - -func getContainerStatusTestData() (*containerstore.Metadata, *containerstore.Status, - *imagestore.Image, *runtime.ContainerStatus) { - imageID := "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" - testID := "test-id" - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{Image: "test-image"}, - Mounts: []*runtime.Mount{{ - ContainerPath: "test-container-path", - HostPath: "test-host-path", - }}, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - } - - createdAt := time.Now().UnixNano() - - metadata := &containerstore.Metadata{ - ID: testID, - Name: "test-long-name", - SandboxID: "test-sandbox-id", - Config: config, - ImageRef: imageID, - LogPath: "test-log-path", - } - status := &containerstore.Status{ - Pid: 1234, - CreatedAt: createdAt, - } - image := &imagestore.Image{ - ID: imageID, - References: []string{ - "gcr.io/library/busybox:latest", - "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - }, - } - expected := &runtime.ContainerStatus{ - Id: testID, - Metadata: config.GetMetadata(), - State: runtime.ContainerState_CONTAINER_CREATED, - CreatedAt: createdAt, - Image: &runtime.ImageSpec{Image: "gcr.io/library/busybox:latest"}, - ImageRef: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - Reason: completeExitReason, - Labels: config.GetLabels(), - Annotations: config.GetAnnotations(), - Mounts: config.GetMounts(), - LogPath: "test-log-path", - } - - return metadata, status, image, expected -} - -func TestToCRIContainerStatus(t *testing.T) { - for _, test := range []struct { - desc string - startedAt int64 - finishedAt int64 - exitCode int32 - reason string - message string - expectedState runtime.ContainerState - expectedReason string - }{ - { - desc: "container created", - expectedState: runtime.ContainerState_CONTAINER_CREATED, - }, - { - desc: "container running", - startedAt: time.Now().UnixNano(), - expectedState: runtime.ContainerState_CONTAINER_RUNNING, - }, - { - desc: "container exited with reason", - startedAt: time.Now().UnixNano(), - finishedAt: time.Now().UnixNano(), - exitCode: 1, - reason: "test-reason", - message: "test-message", - expectedState: runtime.ContainerState_CONTAINER_EXITED, - expectedReason: "test-reason", - }, - { - desc: "container exited with exit code 0 without reason", - startedAt: time.Now().UnixNano(), - finishedAt: time.Now().UnixNano(), - exitCode: 0, - message: "test-message", - expectedState: runtime.ContainerState_CONTAINER_EXITED, - expectedReason: completeExitReason, - }, - { - desc: "container exited with non-zero exit code without reason", - startedAt: time.Now().UnixNano(), - finishedAt: time.Now().UnixNano(), - exitCode: 1, - message: "test-message", - expectedState: runtime.ContainerState_CONTAINER_EXITED, - expectedReason: errorExitReason, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - - metadata, status, _, expected := getContainerStatusTestData() - // Update status with test case. - status.StartedAt = test.startedAt - status.FinishedAt = test.finishedAt - status.ExitCode = test.exitCode - status.Reason = test.reason - status.Message = test.message - container, err := containerstore.NewContainer( - *metadata, - containerstore.WithFakeStatus(*status), - ) - assert.NoError(t, err) - // Set expectation based on test case. - expected.Reason = test.expectedReason - expected.StartedAt = test.startedAt - expected.FinishedAt = test.finishedAt - expected.ExitCode = test.exitCode - expected.Message = test.message - patchExceptedWithState(expected, test.expectedState) - containerStatus := toCRIContainerStatus(container, - expected.Image, - expected.ImageRef) - assert.Equal(t, expected, containerStatus, test.desc) - }) - } -} - -// TODO(mikebrow): add a fake containerd container.Container.Spec client api so we can test verbose is true option -func TestToCRIContainerInfo(t *testing.T) { - metadata, status, _, _ := getContainerStatusTestData() - container, err := containerstore.NewContainer( - *metadata, - containerstore.WithFakeStatus(*status), - ) - assert.NoError(t, err) - - info, err := toCRIContainerInfo(context.Background(), - container, - false) - assert.NoError(t, err) - assert.Nil(t, info) -} - -func TestContainerStatus(t *testing.T) { - for _, test := range []struct { - desc string - exist bool - imageExist bool - startedAt int64 - finishedAt int64 - reason string - expectedState runtime.ContainerState - expectErr bool - }{ - { - desc: "container created", - exist: true, - imageExist: true, - expectedState: runtime.ContainerState_CONTAINER_CREATED, - }, - { - desc: "container running", - exist: true, - imageExist: true, - startedAt: time.Now().UnixNano(), - expectedState: runtime.ContainerState_CONTAINER_RUNNING, - }, - { - desc: "container exited", - exist: true, - imageExist: true, - startedAt: time.Now().UnixNano(), - finishedAt: time.Now().UnixNano(), - reason: "test-reason", - expectedState: runtime.ContainerState_CONTAINER_EXITED, - }, - { - desc: "container not exist", - exist: false, - imageExist: true, - expectErr: true, - }, - { - desc: "image not exist", - exist: false, - imageExist: false, - expectErr: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - metadata, status, image, expected := getContainerStatusTestData() - // Update status with test case. - status.StartedAt = test.startedAt - status.FinishedAt = test.finishedAt - status.Reason = test.reason - container, err := containerstore.NewContainer( - *metadata, - containerstore.WithFakeStatus(*status), - ) - assert.NoError(t, err) - if test.exist { - assert.NoError(t, c.containerStore.Add(container)) - } - if test.imageExist { - imageStore, err := imagestore.NewFakeStore([]imagestore.Image{*image}) - assert.NoError(t, err) - c.imageService = &fakeImageService{imageStore: imageStore} - } - resp, err := c.ContainerStatus(context.Background(), &runtime.ContainerStatusRequest{ContainerId: container.ID}) - if test.expectErr { - assert.Error(t, err) - assert.Nil(t, resp) - return - } - // Set expectation based on test case. - expected.StartedAt = test.startedAt - expected.FinishedAt = test.finishedAt - expected.Reason = test.reason - patchExceptedWithState(expected, test.expectedState) - assert.Equal(t, expected, resp.GetStatus()) - }) - } -} - -type fakeImageService struct { - runtime.ImageServiceServer - imageStore *imagestore.Store -} - -func (s *fakeImageService) RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string { - return "" -} - -func (s *fakeImageService) UpdateImage(ctx context.Context, r string) error { return nil } - -func (s *fakeImageService) GetImage(id string) (imagestore.Image, error) { return s.imageStore.Get(id) } - -func (s *fakeImageService) GetSnapshot(key string) (snapshotstore.Snapshot, error) { - return snapshotstore.Snapshot{}, errors.New("not implemented") -} - -func (s *fakeImageService) LocalResolve(refOrID string) (imagestore.Image, error) { - return imagestore.Image{}, errors.New("not implemented") -} - -func patchExceptedWithState(expected *runtime.ContainerStatus, state runtime.ContainerState) { - expected.State = state - switch state { - case runtime.ContainerState_CONTAINER_CREATED: - expected.StartedAt, expected.FinishedAt = 0, 0 - case runtime.ContainerState_CONTAINER_RUNNING: - expected.FinishedAt = 0 - } -} diff --git a/pkg/cri/sbserver/container_stop.go b/pkg/cri/sbserver/container_stop.go deleted file mode 100644 index 8a83f48ba..000000000 --- a/pkg/cri/sbserver/container_stop.go +++ /dev/null @@ -1,219 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "sync/atomic" - "syscall" - "time" - - eventtypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/errdefs" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/log" - - "github.com/moby/sys/signal" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// StopContainer stops a running container with a grace period (i.e., timeout). -func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) { - start := time.Now() - // Get container config from container store. - container, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) - } - - if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil { - return nil, err - } - - sandbox, err := c.sandboxStore.Get(container.SandboxID) - if err != nil { - err = c.nri.StopContainer(ctx, nil, &container) - } else { - err = c.nri.StopContainer(ctx, &sandbox, &container) - } - if err != nil { - log.G(ctx).WithError(err).Error("NRI failed to stop container") - } - - i, err := container.Container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("get container info: %w", err) - } - - containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start) - - return &runtime.StopContainerResponse{}, nil -} - -// stopContainer stops a container based on the container metadata. -func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error { - id := container.ID - sandboxID := container.SandboxID - - // Return without error if container is not running. This makes sure that - // stop only takes real action after the container is started. - state := container.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING && - state != runtime.ContainerState_CONTAINER_UNKNOWN { - log.G(ctx).Infof("Container to stop %q must be in running or unknown state, current state %q", - id, criContainerStateToString(state)) - return nil - } - - task, err := container.Container.Task(ctx, nil) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get task for container %q: %w", id, err) - } - // Don't return for unknown state, some cleanup needs to be done. - if state == runtime.ContainerState_CONTAINER_UNKNOWN { - return cleanupUnknownContainer(ctx, id, container, sandboxID, c) - } - return nil - } - - // Handle unknown state. - if state == runtime.ContainerState_CONTAINER_UNKNOWN { - // Start an exit handler for containers in unknown state. - waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext()) - defer waitCancel() - exitCh, err := task.Wait(waitCtx) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to wait for task for %q: %w", id, err) - } - return cleanupUnknownContainer(ctx, id, container, sandboxID, c) - } - - exitCtx, exitCancel := context.WithCancel(context.Background()) - stopCh := c.eventMonitor.startContainerExitMonitor(exitCtx, id, task.Pid(), exitCh) - defer func() { - exitCancel() - // This ensures that exit monitor is stopped before - // `Wait` is cancelled, so no exit event is generated - // because of the `Wait` cancellation. - <-stopCh - }() - } - - // We only need to kill the task. The event handler will Delete the - // task from containerd after it handles the Exited event. - if timeout > 0 { - stopSignal := "SIGTERM" - if container.StopSignal != "" { - stopSignal = container.StopSignal - } else { - // The image may have been deleted, and the `StopSignal` field is - // just introduced to handle that. - // However, for containers created before the `StopSignal` field is - // introduced, still try to get the stop signal from the image config. - // If the image has been deleted, logging an error and using the - // default SIGTERM is still better than returning error and leaving - // the container unstoppable. (See issue #990) - // TODO(random-liu): Remove this logic when containerd 1.2 is deprecated. - image, err := c.GetImage(container.ImageRef) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get image %q: %w", container.ImageRef, err) - } - log.G(ctx).Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal) - } else { - if image.ImageSpec.Config.StopSignal != "" { - stopSignal = image.ImageSpec.Config.StopSignal - } - } - } - sig, err := signal.ParseSignal(stopSignal) - if err != nil { - return fmt.Errorf("failed to parse stop signal %q: %w", stopSignal, err) - } - - var sswt bool - if container.IsStopSignaledWithTimeout == nil { - log.G(ctx).Infof("unable to ensure stop signal %v was not sent twice to container %v", sig, id) - sswt = true - } else { - sswt = atomic.CompareAndSwapUint32(container.IsStopSignaledWithTimeout, 0, 1) - } - - if sswt { - log.G(ctx).Infof("Stop container %q with signal %v", id, sig) - if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to stop container %q: %w", id, err) - } - } else { - log.G(ctx).Infof("Skipping the sending of signal %v to container %q because a prior stop with timeout>0 request already sent the signal", sig, id) - } - - sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout) - defer sigTermCtxCancel() - err = c.waitContainerStop(sigTermCtx, container) - if err == nil { - // Container stopped on first signal no need for SIGKILL - return nil - } - // If the parent context was cancelled or exceeded return immediately - if ctx.Err() != nil { - return ctx.Err() - } - // sigTermCtx was exceeded. Send SIGKILL - log.G(ctx).Debugf("Stop container %q with signal %v timed out", id, sig) - } - - log.G(ctx).Infof("Kill container %q", id) - if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to kill container %q: %w", id, err) - } - - // Wait for a fixed timeout until container stop is observed by event monitor. - err = c.waitContainerStop(ctx, container) - if err != nil { - return fmt.Errorf("an error occurs during waiting for container %q to be killed: %w", id, err) - } - return nil -} - -// waitContainerStop waits for container to be stopped until context is -// cancelled or the context deadline is exceeded. -func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error { - select { - case <-ctx.Done(): - return fmt.Errorf("wait container %q: %w", container.ID, ctx.Err()) - case <-container.Stopped(): - return nil - } -} - -// cleanupUnknownContainer cleanup stopped container in unknown state. -func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container, sandboxID string, c *criService) error { - // Reuse handleContainerExit to do the cleanup. - return handleContainerExit(ctx, &eventtypes.TaskExit{ - ContainerID: id, - ID: id, - Pid: 0, - ExitStatus: unknownExitCode, - ExitedAt: protobuf.ToTimestamp(time.Now()), - }, cntr, sandboxID, c) -} diff --git a/pkg/cri/sbserver/container_stop_test.go b/pkg/cri/sbserver/container_stop_test.go deleted file mode 100644 index 8dbee0d37..000000000 --- a/pkg/cri/sbserver/container_stop_test.go +++ /dev/null @@ -1,92 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "testing" - "time" - - "github.com/stretchr/testify/assert" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -func TestWaitContainerStop(t *testing.T) { - id := "test-id" - for _, test := range []struct { - desc string - status *containerstore.Status - cancel bool - timeout time.Duration - expectErr bool - }{ - { - desc: "should return error if timeout exceeds", - status: &containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - }, - timeout: 200 * time.Millisecond, - expectErr: true, - }, - { - desc: "should return error if context is cancelled", - status: &containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - }, - timeout: time.Hour, - cancel: true, - expectErr: true, - }, - { - desc: "should not return error if container is stopped before timeout", - status: &containerstore.Status{ - CreatedAt: time.Now().UnixNano(), - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - }, - timeout: time.Hour, - expectErr: false, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - container, err := containerstore.NewContainer( - containerstore.Metadata{ID: id}, - containerstore.WithFakeStatus(*test.status), - ) - assert.NoError(t, err) - assert.NoError(t, c.containerStore.Add(container)) - ctx := context.Background() - if test.cancel { - cancelledCtx, cancel := context.WithCancel(ctx) - cancel() - ctx = cancelledCtx - } - if test.timeout > 0 { - timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) - defer cancel() - ctx = timeoutCtx - } - err = c.waitContainerStop(ctx, container) - assert.Equal(t, test.expectErr, err != nil, test.desc) - }) - } -} diff --git a/pkg/cri/sbserver/container_update_resources.go b/pkg/cri/sbserver/container_update_resources.go deleted file mode 100644 index 8e80bc29d..000000000 --- a/pkg/cri/sbserver/container_update_resources.go +++ /dev/null @@ -1,157 +0,0 @@ -//go:build !darwin && !freebsd - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - gocontext "context" - "fmt" - - "github.com/containerd/typeurl/v2" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/log" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" -) - -// UpdateContainerResources updates ContainerConfig of the container. -func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) { - container, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("failed to find container: %w", err) - } - - sandbox, err := c.sandboxStore.Get(container.SandboxID) - if err != nil { - return nil, err - } - - resources := r.GetLinux() - updated, err := c.nri.UpdateContainerResources(ctx, &sandbox, &container, resources) - if err != nil { - return nil, fmt.Errorf("NRI container update failed: %w", err) - } - if updated != nil { - *resources = *updated - } - - // Update resources in status update transaction, so that: - // 1) There won't be race condition with container start. - // 2) There won't be concurrent resource update to the same container. - if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { - return c.updateContainerResources(ctx, container, r, status) - }); err != nil { - return nil, fmt.Errorf("failed to update resources: %w", err) - } - - err = c.nri.PostUpdateContainerResources(ctx, &sandbox, &container) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI post-update notification failed") - } - - return &runtime.UpdateContainerResourcesResponse{}, nil -} - -func (c *criService) updateContainerResources(ctx context.Context, - cntr containerstore.Container, - r *runtime.UpdateContainerResourcesRequest, - status containerstore.Status) (newStatus containerstore.Status, retErr error) { - - newStatus = status - id := cntr.ID - // Do not update the container when there is a removal in progress. - if status.Removing { - return newStatus, fmt.Errorf("container %q is in removing state", id) - } - - // Update container spec. If the container is not started yet, updating - // spec makes sure that the resource limits are correct when start; - // if the container is already started, updating spec is still required, - // the spec will become our source of truth for resource limits. - oldSpec, err := cntr.Container.Spec(ctx) - if err != nil { - return newStatus, fmt.Errorf("failed to get container spec: %w", err) - } - newSpec, err := updateOCIResource(ctx, oldSpec, r, c.config) - if err != nil { - return newStatus, fmt.Errorf("failed to update resource in spec: %w", err) - } - - if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil { - return newStatus, err - } - defer func() { - if retErr != nil { - deferCtx, deferCancel := ctrdutil.DeferContext() - defer deferCancel() - // Reset spec on error. - if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id) - } - } else { - // Update container status only when the spec is updated - newStatus = copyResourcesToStatus(newSpec, status) - } - }() - - // If container is not running, only update spec is enough, new resource - // limit will be applied when container start. - if status.State() != runtime.ContainerState_CONTAINER_RUNNING { - return newStatus, nil - } - - task, err := cntr.Container.Task(ctx, nil) - if err != nil { - if errdefs.IsNotFound(err) { - // Task exited already. - return newStatus, nil - } - return newStatus, fmt.Errorf("failed to get task: %w", err) - } - // newSpec.Linux / newSpec.Windows won't be nil - if err := task.Update(ctx, containerd.WithResources(getResources(newSpec))); err != nil { - if errdefs.IsNotFound(err) { - // Task exited already. - return newStatus, nil - } - return newStatus, fmt.Errorf("failed to update resources: %w", err) - } - return newStatus, nil -} - -// updateContainerSpec updates container spec. -func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error { - s, err := typeurl.MarshalAny(spec) - if err != nil { - return fmt.Errorf("failed to marshal spec %+v: %w", spec, err) - } - if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error { - c.Spec = s - return nil - }); err != nil { - return fmt.Errorf("failed to update container spec: %w", err) - } - return nil -} diff --git a/pkg/cri/sbserver/container_update_resources_linux.go b/pkg/cri/sbserver/container_update_resources_linux.go deleted file mode 100644 index 04186ac11..000000000 --- a/pkg/cri/sbserver/container_update_resources_linux.go +++ /dev/null @@ -1,51 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/opts" - "github.com/containerd/containerd/pkg/cri/util" -) - -// updateOCIResource updates container resource limit. -func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest, - config criconfig.Config) (*runtimespec.Spec, error) { - - // Copy to make sure old spec is not changed. - var cloned runtimespec.Spec - if err := util.DeepCopy(&cloned, spec); err != nil { - return nil, fmt.Errorf("failed to deep copy: %w", err) - } - if cloned.Linux == nil { - cloned.Linux = &runtimespec.Linux{} - } - if err := opts.WithResources(r.GetLinux(), config.TolerateMissingHugetlbController, config.DisableHugetlbController)(ctx, nil, nil, &cloned); err != nil { - return nil, fmt.Errorf("unable to set linux container resources: %w", err) - } - return &cloned, nil -} - -func getResources(spec *runtimespec.Spec) interface{} { - return spec.Linux.Resources -} diff --git a/pkg/cri/sbserver/container_update_resources_linux_test.go b/pkg/cri/sbserver/container_update_resources_linux_test.go deleted file mode 100644 index 6cff864e8..000000000 --- a/pkg/cri/sbserver/container_update_resources_linux_test.go +++ /dev/null @@ -1,256 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "testing" - - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/assert" - "google.golang.org/protobuf/proto" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - criopts "github.com/containerd/containerd/pkg/cri/opts" -) - -func TestUpdateOCILinuxResource(t *testing.T) { - oomscoreadj := new(int) - *oomscoreadj = -500 - expectedSwap := func(swap int64) *int64 { - if criopts.SwapControllerAvailable() { - return &swap - } - return nil - } - - for _, test := range []struct { - desc string - spec *runtimespec.Spec - request *runtime.UpdateContainerResourcesRequest - expected *runtimespec.Spec - expectErr bool - }{ - { - desc: "should be able to update each resource", - spec: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(1111), - Quota: proto.Int64(2222), - Period: proto.Uint64(3333), - Cpus: "0-1", - Mems: "2-3", - }, - Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, - }, - }, - }, - request: &runtime.UpdateContainerResourcesRequest{ - Linux: &runtime.LinuxContainerResources{ - CpuPeriod: 6666, - CpuQuota: 5555, - CpuShares: 4444, - MemoryLimitInBytes: 54321, - OomScoreAdj: 500, - CpusetCpus: "4-5", - CpusetMems: "6-7", - Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"}, - }, - }, - expected: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{ - Limit: proto.Int64(54321), - Swap: expectedSwap(54321), - }, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(4444), - Quota: proto.Int64(5555), - Period: proto.Uint64(6666), - Cpus: "4-5", - Mems: "6-7", - }, - Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"}, - }, - }, - }, - }, - { - desc: "should skip empty fields", - spec: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(1111), - Quota: proto.Int64(2222), - Period: proto.Uint64(3333), - Cpus: "0-1", - Mems: "2-3", - }, - Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, - }, - }, - }, - request: &runtime.UpdateContainerResourcesRequest{ - Linux: &runtime.LinuxContainerResources{ - CpuQuota: 5555, - CpuShares: 4444, - MemoryLimitInBytes: 54321, - OomScoreAdj: 500, - CpusetMems: "6-7", - }, - }, - expected: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{ - Limit: proto.Int64(54321), - Swap: expectedSwap(54321), - }, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(4444), - Quota: proto.Int64(5555), - Period: proto.Uint64(3333), - Cpus: "0-1", - Mems: "6-7", - }, - Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, - }, - }, - }, - }, - { - desc: "should be able to fill empty fields", - spec: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, - }, - }, - }, - request: &runtime.UpdateContainerResourcesRequest{ - Linux: &runtime.LinuxContainerResources{ - CpuPeriod: 6666, - CpuQuota: 5555, - CpuShares: 4444, - MemoryLimitInBytes: 54321, - OomScoreAdj: 500, - CpusetCpus: "4-5", - CpusetMems: "6-7", - Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, - }, - }, - expected: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{ - Limit: proto.Int64(54321), - Swap: expectedSwap(54321), - }, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(4444), - Quota: proto.Int64(5555), - Period: proto.Uint64(6666), - Cpus: "4-5", - Mems: "6-7", - }, - Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"}, - }, - }, - }, - }, - { - desc: "should be able to patch the unified map", - spec: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)}, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(1111), - Quota: proto.Int64(2222), - Period: proto.Uint64(3333), - Cpus: "0-1", - Mems: "2-3", - }, - Unified: map[string]string{"memory.min": "65536", "memory.max": "1507328"}, - }, - }, - }, - request: &runtime.UpdateContainerResourcesRequest{ - Linux: &runtime.LinuxContainerResources{ - CpuPeriod: 6666, - CpuQuota: 5555, - CpuShares: 4444, - MemoryLimitInBytes: 54321, - OomScoreAdj: 500, - CpusetCpus: "4-5", - CpusetMems: "6-7", - Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "1024"}, - }, - }, - expected: &runtimespec.Spec{ - Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj}, - Linux: &runtimespec.Linux{ - Resources: &runtimespec.LinuxResources{ - Memory: &runtimespec.LinuxMemory{ - Limit: proto.Int64(54321), - Swap: expectedSwap(54321), - }, - CPU: &runtimespec.LinuxCPU{ - Shares: proto.Uint64(4444), - Quota: proto.Int64(5555), - Period: proto.Uint64(6666), - Cpus: "4-5", - Mems: "6-7", - }, - Unified: map[string]string{"memory.min": "1507328", "memory.max": "1507328", "memory.swap.max": "1024"}, - }, - }, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - config := criconfig.Config{ - PluginConfig: criconfig.PluginConfig{ - TolerateMissingHugetlbController: true, - DisableHugetlbController: false, - }, - } - got, err := updateOCIResource(context.Background(), test.spec, test.request, config) - if test.expectErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - assert.Equal(t, test.expected, got) - }) - } -} diff --git a/pkg/cri/sbserver/container_update_resources_other.go b/pkg/cri/sbserver/container_update_resources_other.go deleted file mode 100644 index 7fa3f29df..000000000 --- a/pkg/cri/sbserver/container_update_resources_other.go +++ /dev/null @@ -1,45 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -// UpdateContainerResources updates ContainerConfig of the container. -func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) { - container, err := c.containerStore.Get(r.GetContainerId()) - if err != nil { - return nil, fmt.Errorf("failed to find container: %w", err) - } - // Update resources in status update transaction, so that: - // 1) There won't be race condition with container start. - // 2) There won't be concurrent resource update to the same container. - if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { - return status, nil - }); err != nil { - return nil, fmt.Errorf("failed to update resources: %w", err) - } - return &runtime.UpdateContainerResourcesResponse{}, nil -} diff --git a/pkg/cri/sbserver/container_update_resources_windows.go b/pkg/cri/sbserver/container_update_resources_windows.go deleted file mode 100644 index adbad53fd..000000000 --- a/pkg/cri/sbserver/container_update_resources_windows.go +++ /dev/null @@ -1,51 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/opts" - "github.com/containerd/containerd/pkg/cri/util" -) - -// updateOCIResource updates container resource limit. -func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest, - config criconfig.Config) (*runtimespec.Spec, error) { - - // Copy to make sure old spec is not changed. - var cloned runtimespec.Spec - if err := util.DeepCopy(&cloned, spec); err != nil { - return nil, fmt.Errorf("failed to deep copy: %w", err) - } - if cloned.Windows == nil { - cloned.Windows = &runtimespec.Windows{} - } - if err := opts.WithWindowsResources(r.GetWindows())(ctx, nil, nil, &cloned); err != nil { - return nil, fmt.Errorf("unable to set windows container resources: %w", err) - } - return &cloned, nil -} - -func getResources(spec *runtimespec.Spec) interface{} { - return spec.Windows.Resources -} diff --git a/pkg/cri/sbserver/events.go b/pkg/cri/sbserver/events.go deleted file mode 100644 index 1352e056e..000000000 --- a/pkg/cri/sbserver/events.go +++ /dev/null @@ -1,589 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - "sync" - "time" - - "github.com/containerd/containerd" - eventtypes "github.com/containerd/containerd/api/events" - apitasks "github.com/containerd/containerd/api/services/tasks/v1" - containerdio "github.com/containerd/containerd/cio" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/pkg/cri/constants" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "k8s.io/utils/clock" -) - -const ( - backOffInitDuration = 1 * time.Second - backOffMaxDuration = 5 * time.Minute - backOffExpireCheckDuration = 1 * time.Second - - // handleEventTimeout is the timeout for handling 1 event. Event monitor - // handles events in serial, if one event blocks the event monitor, no - // other events can be handled. - // Add a timeout for each event handling, events that timeout will be requeued and - // handled again in the future. - handleEventTimeout = 10 * time.Second -) - -// eventMonitor monitors containerd event and updates internal state correspondingly. -type eventMonitor struct { - c *criService - ch <-chan *events.Envelope - errCh <-chan error - ctx context.Context - cancel context.CancelFunc - backOff *backOff -} - -type backOff struct { - // queuePoolMu is mutex used to protect the queuePool map - queuePoolMu sync.Mutex - - queuePool map[string]*backOffQueue - // tickerMu is mutex used to protect the ticker. - tickerMu sync.Mutex - ticker *time.Ticker - minDuration time.Duration - maxDuration time.Duration - checkDuration time.Duration - clock clock.Clock -} - -type backOffQueue struct { - events []interface{} - expireTime time.Time - duration time.Duration - clock clock.Clock -} - -// Create new event monitor. New event monitor will start subscribing containerd event. All events -// happen after it should be monitored. -func newEventMonitor(c *criService) *eventMonitor { - ctx, cancel := context.WithCancel(context.Background()) - return &eventMonitor{ - c: c, - ctx: ctx, - cancel: cancel, - backOff: newBackOff(), - } -} - -// subscribe starts to subscribe containerd events. -func (em *eventMonitor) subscribe(subscriber events.Subscriber) { - // note: filters are any match, if you want any match but not in namespace foo - // then you have to manually filter namespace foo - filters := []string{ - `topic=="/tasks/oom"`, - `topic~="/images/"`, - } - em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...) -} - -// startSandboxExitMonitor starts an exit monitor for a given sandbox. -func (em *eventMonitor) startSandboxExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} { - stopCh := make(chan struct{}) - go func() { - defer close(stopCh) - select { - case exitRes := <-exitCh: - exitStatus, exitedAt, err := exitRes.Result() - if err != nil { - log.L.WithError(err).Errorf("failed to get task exit status for %q", id) - exitStatus = unknownExitCode - exitedAt = time.Now() - } - - e := &eventtypes.SandboxExit{ - SandboxID: id, - ExitStatus: exitStatus, - ExitedAt: protobuf.ToTimestamp(exitedAt), - } - - log.L.Debugf("received exit event %+v", e) - - err = func() error { - dctx := ctrdutil.NamespacedContext() - dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout) - defer dcancel() - - sb, err := em.c.sandboxStore.Get(e.GetSandboxID()) - if err == nil { - if err := handleSandboxExit(dctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil { - return err - } - return nil - } else if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get sandbox %s: %w", e.SandboxID, err) - } - return nil - }() - if err != nil { - log.L.WithError(err).Errorf("failed to handle sandbox TaskExit event %+v", e) - em.backOff.enBackOff(id, e) - } - return - case <-ctx.Done(): - } - }() - return stopCh -} - -// startContainerExitMonitor starts an exit monitor for a given container. -func (em *eventMonitor) startContainerExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} { - stopCh := make(chan struct{}) - go func() { - defer close(stopCh) - select { - case exitRes := <-exitCh: - exitStatus, exitedAt, err := exitRes.Result() - if err != nil { - log.L.WithError(err).Errorf("failed to get task exit status for %q", id) - exitStatus = unknownExitCode - exitedAt = time.Now() - } - - e := &eventtypes.TaskExit{ - ContainerID: id, - ID: id, - Pid: pid, - ExitStatus: exitStatus, - ExitedAt: protobuf.ToTimestamp(exitedAt), - } - - log.L.Debugf("received exit event %+v", e) - - err = func() error { - dctx := ctrdutil.NamespacedContext() - dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout) - defer dcancel() - - cntr, err := em.c.containerStore.Get(e.ID) - if err == nil { - if err := handleContainerExit(dctx, e, cntr, cntr.SandboxID, em.c); err != nil { - return err - } - return nil - } else if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get container %s: %w", e.ID, err) - } - return nil - }() - if err != nil { - log.L.WithError(err).Errorf("failed to handle container TaskExit event %+v", e) - em.backOff.enBackOff(id, e) - } - return - case <-ctx.Done(): - } - }() - return stopCh -} - -func convertEvent(e typeurl.Any) (string, interface{}, error) { - id := "" - evt, err := typeurl.UnmarshalAny(e) - if err != nil { - return "", nil, fmt.Errorf("failed to unmarshalany: %w", err) - } - - switch e := evt.(type) { - case *eventtypes.TaskOOM: - id = e.ContainerID - case *eventtypes.SandboxExit: - id = e.SandboxID - case *eventtypes.ImageCreate: - id = e.Name - case *eventtypes.ImageUpdate: - id = e.Name - case *eventtypes.ImageDelete: - id = e.Name - default: - return "", nil, errors.New("unsupported event") - } - return id, evt, nil -} - -// start starts the event monitor which monitors and handles all subscribed events. -// It returns an error channel for the caller to wait for stop errors from the -// event monitor. -// -// NOTE: -// 1. start must be called after subscribe. -// 2. The task exit event has been handled in individual startSandboxExitMonitor -// or startContainerExitMonitor goroutine at the first. If the goroutine fails, -// it puts the event into backoff retry queue and event monitor will handle -// it later. -func (em *eventMonitor) start() <-chan error { - errCh := make(chan error) - if em.ch == nil || em.errCh == nil { - panic("event channel is nil") - } - backOffCheckCh := em.backOff.start() - go func() { - defer close(errCh) - for { - select { - case e := <-em.ch: - log.L.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic) - if e.Namespace != constants.K8sContainerdNamespace { - log.L.Debugf("Ignoring events in namespace - %q", e.Namespace) - break - } - id, evt, err := convertEvent(e.Event) - if err != nil { - log.L.WithError(err).Errorf("Failed to convert event %+v", e) - break - } - if em.backOff.isInBackOff(id) { - log.L.Infof("Events for %q is in backoff, enqueue event %+v", id, evt) - em.backOff.enBackOff(id, evt) - break - } - if err := em.handleEvent(evt); err != nil { - log.L.WithError(err).Errorf("Failed to handle event %+v for %s", evt, id) - em.backOff.enBackOff(id, evt) - } - case err := <-em.errCh: - // Close errCh in defer directly if there is no error. - if err != nil { - log.L.WithError(err).Error("Failed to handle event stream") - errCh <- err - } - return - case <-backOffCheckCh: - ids := em.backOff.getExpiredIDs() - for _, id := range ids { - queue := em.backOff.deBackOff(id) - for i, evt := range queue.events { - if err := em.handleEvent(evt); err != nil { - log.L.WithError(err).Errorf("Failed to handle backOff event %+v for %s", evt, id) - em.backOff.reBackOff(id, queue.events[i:], queue.duration) - break - } - } - } - } - } - }() - return errCh -} - -// stop stops the event monitor. It will close the event channel. -// Once event monitor is stopped, it can't be started. -func (em *eventMonitor) stop() { - em.backOff.stop() - em.cancel() -} - -// handleEvent handles a containerd event. -func (em *eventMonitor) handleEvent(any interface{}) error { - ctx := ctrdutil.NamespacedContext() - ctx, cancel := context.WithTimeout(ctx, handleEventTimeout) - defer cancel() - - switch e := any.(type) { - case *eventtypes.TaskExit: - log.L.Infof("TaskExit event %+v", e) - // Use ID instead of ContainerID to rule out TaskExit event for exec. - cntr, err := em.c.containerStore.Get(e.ID) - if err == nil { - if err := handleContainerExit(ctx, e, cntr, cntr.SandboxID, em.c); err != nil { - return fmt.Errorf("failed to handle container TaskExit event: %w", err) - } - return nil - } else if !errdefs.IsNotFound(err) { - return fmt.Errorf("can't find container for TaskExit event: %w", err) - } - sb, err := em.c.sandboxStore.Get(e.ID) - if err == nil { - if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil { - return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err) - } - return nil - } else if !errdefs.IsNotFound(err) { - return fmt.Errorf("can't find sandbox for TaskExit event: %w", err) - } - return nil - case *eventtypes.SandboxExit: - log.L.Infof("SandboxExit event %+v", e) - sb, err := em.c.sandboxStore.Get(e.GetSandboxID()) - if err == nil { - if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil { - return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err) - } - return nil - } else if !errdefs.IsNotFound(err) { - return fmt.Errorf("can't find sandbox for TaskExit event: %w", err) - } - return nil - case *eventtypes.TaskOOM: - log.L.Infof("TaskOOM event %+v", e) - // For TaskOOM, we only care which container it belongs to. - cntr, err := em.c.containerStore.Get(e.ContainerID) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("can't find container for TaskOOM event: %w", err) - } - return nil - } - err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { - status.Reason = oomExitReason - return status, nil - }) - if err != nil { - return fmt.Errorf("failed to update container status for TaskOOM event: %w", err) - } - case *eventtypes.ImageCreate: - log.L.Infof("ImageCreate event %+v", e) - return em.c.UpdateImage(ctx, e.Name) - case *eventtypes.ImageUpdate: - log.L.Infof("ImageUpdate event %+v", e) - return em.c.UpdateImage(ctx, e.Name) - case *eventtypes.ImageDelete: - log.L.Infof("ImageDelete event %+v", e) - return em.c.UpdateImage(ctx, e.Name) - } - - return nil -} - -// handleContainerExit handles TaskExit event for container. -func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr containerstore.Container, sandboxID string, c *criService) error { - // Attach container IO so that `Delete` could cleanup the stream properly. - task, err := cntr.Container.Task(ctx, - func(*containerdio.FIFOSet) (containerdio.IO, error) { - // We can't directly return cntr.IO here, because - // even if cntr.IO is nil, the cio.IO interface - // is not. - // See https://tour.golang.org/methods/12: - // Note that an interface value that holds a nil - // concrete value is itself non-nil. - if cntr.IO != nil { - return cntr.IO, nil - } - return nil, nil - }, - ) - if err != nil { - if !errdefs.IsNotFound(err) && !errdefs.IsUnavailable(err) { - return fmt.Errorf("failed to load task for container: %w", err) - } - } else { - // TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker - if _, err = task.Delete(ctx, c.nri.WithContainerExit(&cntr), containerd.WithProcessKill); err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to stop container: %w", err) - } - // Move on to make sure container status is updated. - } - } - - // NOTE: Both sb.Container.Task and task.Delete interface always ensures - // that the status of target task. However, the interfaces return - // ErrNotFound, which doesn't mean that the shim instance doesn't exist. - // - // There are two caches for task in containerd: - // - // 1. io.containerd.service.v1.tasks-service - // 2. io.containerd.runtime.v2.task - // - // First one is to maintain the shim connection and shutdown the shim - // in Delete API. And the second one is to maintain the lifecycle of - // task in shim server. - // - // So, if the shim instance is running and task has been deleted in shim - // server, the sb.Container.Task and task.Delete will receive the - // ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service, - // shim will be leaky. - // - // Based on containerd/containerd#7496 issue, when host is under IO - // pressure, the umount2 syscall will take more than 10 seconds so that - // the CRI plugin will cancel this task.Delete call. However, the shim - // server isn't aware about this. After return from umount2 syscall, the - // shim server continue delete the task record. And then CRI plugin - // retries to delete task and retrieves ErrNotFound and marks it as - // stopped. Therefore, The shim is leaky. - // - // It's hard to handle the connection lost or request canceled cases in - // shim server. We should call Delete API to io.containerd.service.v1.tasks-service - // to ensure that shim instance is shutdown. - // - // REF: - // 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968 - // 2. https://github.com/containerd/containerd/issues/8931 - if errdefs.IsNotFound(err) { - _, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: cntr.Container.ID()}) - if err != nil { - err = errdefs.FromGRPC(err) - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to cleanup container %s in task-service: %w", cntr.Container.ID(), err) - } - } - log.L.Infof("Ensure that container %s in task-service has been cleanup successfully", cntr.Container.ID()) - } - - err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { - if status.FinishedAt == 0 { - status.Pid = 0 - status.FinishedAt = protobuf.FromTimestamp(e.ExitedAt).UnixNano() - status.ExitCode = int32(e.ExitStatus) - } - - // Unknown state can only transit to EXITED state, so we need - // to handle unknown state here. - if status.Unknown { - log.L.Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID) - status.Unknown = false - } - return status, nil - }) - if err != nil { - return fmt.Errorf("failed to update container state: %w", err) - } - // Using channel to propagate the information of container stop - cntr.Stop() - c.generateAndSendContainerEvent(ctx, cntr.ID, sandboxID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT) - return nil -} - -// handleSandboxExit handles sandbox exit event. -func handleSandboxExit(ctx context.Context, sb sandboxstore.Sandbox, exitStatus uint32, exitTime time.Time, c *criService) error { - if err := sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { - status.State = sandboxstore.StateNotReady - status.Pid = 0 - status.ExitStatus = exitStatus - status.ExitedAt = exitTime - return status, nil - }); err != nil { - return fmt.Errorf("failed to update sandbox state: %w", err) - } - - // Using channel to propagate the information of sandbox stop - sb.Stop() - c.generateAndSendContainerEvent(ctx, sb.ID, sb.ID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT) - return nil -} - -func newBackOff() *backOff { - return &backOff{ - queuePool: map[string]*backOffQueue{}, - minDuration: backOffInitDuration, - maxDuration: backOffMaxDuration, - checkDuration: backOffExpireCheckDuration, - clock: clock.RealClock{}, - } -} - -func (b *backOff) getExpiredIDs() []string { - b.queuePoolMu.Lock() - defer b.queuePoolMu.Unlock() - - var ids []string - for id, q := range b.queuePool { - if q.isExpire() { - ids = append(ids, id) - } - } - return ids -} - -func (b *backOff) isInBackOff(key string) bool { - b.queuePoolMu.Lock() - defer b.queuePoolMu.Unlock() - - if _, ok := b.queuePool[key]; ok { - return true - } - return false -} - -// enBackOff start to backOff and put event to the tail of queue -func (b *backOff) enBackOff(key string, evt interface{}) { - b.queuePoolMu.Lock() - defer b.queuePoolMu.Unlock() - - if queue, ok := b.queuePool[key]; ok { - queue.events = append(queue.events, evt) - return - } - b.queuePool[key] = newBackOffQueue([]interface{}{evt}, b.minDuration, b.clock) -} - -// enBackOff get out the whole queue -func (b *backOff) deBackOff(key string) *backOffQueue { - b.queuePoolMu.Lock() - defer b.queuePoolMu.Unlock() - - queue := b.queuePool[key] - delete(b.queuePool, key) - return queue -} - -// enBackOff start to backOff again and put events to the queue -func (b *backOff) reBackOff(key string, events []interface{}, oldDuration time.Duration) { - b.queuePoolMu.Lock() - defer b.queuePoolMu.Unlock() - - duration := 2 * oldDuration - if duration > b.maxDuration { - duration = b.maxDuration - } - b.queuePool[key] = newBackOffQueue(events, duration, b.clock) -} - -func (b *backOff) start() <-chan time.Time { - b.tickerMu.Lock() - defer b.tickerMu.Unlock() - b.ticker = time.NewTicker(b.checkDuration) - return b.ticker.C -} - -func (b *backOff) stop() { - b.tickerMu.Lock() - defer b.tickerMu.Unlock() - if b.ticker != nil { - b.ticker.Stop() - } -} - -func newBackOffQueue(events []interface{}, init time.Duration, c clock.Clock) *backOffQueue { - return &backOffQueue{ - events: events, - duration: init, - expireTime: c.Now().Add(init), - clock: c, - } -} - -func (q *backOffQueue) isExpire() bool { - // return time.Now >= expireTime - return !q.clock.Now().Before(q.expireTime) -} diff --git a/pkg/cri/sbserver/events_test.go b/pkg/cri/sbserver/events_test.go deleted file mode 100644 index e5d2d01eb..000000000 --- a/pkg/cri/sbserver/events_test.go +++ /dev/null @@ -1,136 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - "time" - - eventtypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/typeurl/v2" - "github.com/google/go-cmp/cmp" - "github.com/stretchr/testify/assert" - testingclock "k8s.io/utils/clock/testing" -) - -// TestBackOff tests the logic of backOff struct. -func TestBackOff(t *testing.T) { - testStartTime := time.Now() - testClock := testingclock.NewFakeClock(testStartTime) - inputQueues := map[string]*backOffQueue{ - "container1": { - events: []interface{}{ - &eventtypes.TaskOOM{ContainerID: "container1"}, - &eventtypes.TaskOOM{ContainerID: "container1"}, - }, - }, - "container2": { - events: []interface{}{ - &eventtypes.TaskOOM{ContainerID: "container2"}, - &eventtypes.TaskOOM{ContainerID: "container2"}, - }, - }, - } - expectedQueues := map[string]*backOffQueue{ - "container2": { - events: []interface{}{ - &eventtypes.TaskOOM{ContainerID: "container2"}, - &eventtypes.TaskOOM{ContainerID: "container2"}, - }, - expireTime: testClock.Now().Add(backOffInitDuration), - duration: backOffInitDuration, - clock: testClock, - }, - "container1": { - events: []interface{}{ - &eventtypes.TaskOOM{ContainerID: "container1"}, - &eventtypes.TaskOOM{ContainerID: "container1"}, - }, - expireTime: testClock.Now().Add(backOffInitDuration), - duration: backOffInitDuration, - clock: testClock, - }, - } - - t.Logf("Should be able to backOff a event") - actual := newBackOff() - actual.clock = testClock - for k, queue := range inputQueues { - for _, event := range queue.events { - actual.enBackOff(k, event) - } - } - assert.Equal(t, actual.queuePool, expectedQueues) - - t.Logf("Should be able to check if the container is in backOff state") - for k, queue := range inputQueues { - for _, e := range queue.events { - evt, err := typeurl.MarshalAny(e) - assert.NoError(t, err) - key, _, err := convertEvent(evt) - assert.NoError(t, err) - assert.Equal(t, k, key) - assert.Equal(t, actual.isInBackOff(key), true) - } - } - - t.Logf("Should be able to check that a container isn't in backOff state") - notExistKey := "containerNotExist" - assert.Equal(t, actual.isInBackOff(notExistKey), false) - - t.Logf("No containers should be expired") - assert.Empty(t, actual.getExpiredIDs()) - - t.Logf("Should be able to get all keys which are expired for backOff") - testClock.Sleep(backOffInitDuration) - actKeyList := actual.getExpiredIDs() - assert.Equal(t, len(inputQueues), len(actKeyList)) - for k := range inputQueues { - assert.Contains(t, actKeyList, k) - } - - t.Logf("Should be able to get out all backOff events") - doneQueues := map[string]*backOffQueue{} - for k := range inputQueues { - actQueue := actual.deBackOff(k) - doneQueues[k] = actQueue - assert.True(t, cmp.Equal(actQueue.events, expectedQueues[k].events, protobuf.Compare)) - } - - t.Logf("Should not get out the event again after having got out the backOff event") - for k := range inputQueues { - var expect *backOffQueue - actQueue := actual.deBackOff(k) - assert.Equal(t, actQueue, expect) - } - - t.Logf("Should be able to reBackOff") - for k, queue := range doneQueues { - failEventIndex := 1 - events := queue.events[failEventIndex:] - actual.reBackOff(k, events, queue.duration) - actQueue := actual.deBackOff(k) - expQueue := &backOffQueue{ - events: events, - expireTime: testClock.Now().Add(2 * queue.duration), - duration: 2 * queue.duration, - clock: testClock, - } - assert.Equal(t, actQueue, expQueue) - } -} diff --git a/pkg/cri/sbserver/fuzz.go b/pkg/cri/sbserver/fuzz.go deleted file mode 100644 index 362817803..000000000 --- a/pkg/cri/sbserver/fuzz.go +++ /dev/null @@ -1,34 +0,0 @@ -//go:build gofuzz - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "fmt" - - "github.com/containerd/containerd/pkg/cri/server" - "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -func SandboxStore(cs server.CRIService) (*sandbox.Store, error) { - s, ok := cs.(*criService) - if !ok { - return nil, fmt.Errorf("%+v is not sbserver.criService", cs) - } - return s.sandboxStore, nil -} diff --git a/pkg/cri/sbserver/helpers.go b/pkg/cri/sbserver/helpers.go deleted file mode 100644 index 71da2e711..000000000 --- a/pkg/cri/sbserver/helpers.go +++ /dev/null @@ -1,693 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "path" - "path/filepath" - "regexp" - goruntime "runtime" - "strconv" - "strings" - "time" - - runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" - "github.com/containerd/typeurl/v2" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pelletier/go-toml/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/errdefs" - clabels "github.com/containerd/containerd/labels" - criconfig "github.com/containerd/containerd/pkg/cri/config" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" - runtimeoptions "github.com/containerd/containerd/pkg/runtimeoptions/v1" - "github.com/containerd/containerd/plugin" - runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" - "github.com/containerd/log" -) - -// TODO: Move common helpers for sbserver and podsandbox to a dedicated package once basic services are functinal. - -const ( - // errorStartReason is the exit reason when fails to start container. - errorStartReason = "StartError" - // errorStartExitCode is the exit code when fails to start container. - // 128 is the same with Docker's behavior. - // TODO(windows): Figure out what should be used for windows. - errorStartExitCode = 128 - // completeExitReason is the exit reason when container exits with code 0. - completeExitReason = "Completed" - // errorExitReason is the exit reason when container exits with code non-zero. - errorExitReason = "Error" - // oomExitReason is the exit reason when process in container is oom killed. - oomExitReason = "OOMKilled" - - // sandboxesDir contains all sandbox root. A sandbox root is the running - // directory of the sandbox, all files created for the sandbox will be - // placed under this directory. - sandboxesDir = "sandboxes" - // containersDir contains all container root. - containersDir = "containers" - // Delimiter used to construct container/sandbox names. - nameDelimiter = "_" - - // criContainerdPrefix is common prefix for cri-containerd - criContainerdPrefix = "io.cri-containerd" - // containerKindLabel is a label key indicating container is sandbox container or application container - containerKindLabel = criContainerdPrefix + ".kind" - // containerKindSandbox is a label value indicating container is sandbox container - containerKindSandbox = "sandbox" - // containerKindContainer is a label value indicating container is application container - containerKindContainer = "container" - - // containerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest - containerMetadataExtension = criContainerdPrefix + ".container.metadata" - - // defaultIfName is the default network interface for the pods - defaultIfName = "eth0" - - // runtimeRunhcsV1 is the runtime type for runhcs. - runtimeRunhcsV1 = "io.containerd.runhcs.v1" - - // devShm is the default path of /dev/shm. - devShm = "/dev/shm" - // etcHosts is the default path of /etc/hosts file. - etcHosts = "/etc/hosts" - // etcHostname is the default path of /etc/hostname file. - etcHostname = "/etc/hostname" - // resolvConfPath is the abs path of resolv.conf on host or container. - resolvConfPath = "/etc/resolv.conf" -) - -// getSandboxRootDir returns the root directory for managing sandbox files, -// e.g. hosts files. -func (c *criService) getSandboxRootDir(id string) string { - return filepath.Join(c.config.RootDir, sandboxesDir, id) -} - -// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files, -// e.g. named pipes. -func (c *criService) getVolatileSandboxRootDir(id string) string { - return filepath.Join(c.config.StateDir, sandboxesDir, id) -} - -// getSandboxHostname returns the hostname file path inside the sandbox root directory. -func (c *criService) getSandboxHostname(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "hostname") -} - -// getSandboxHosts returns the hosts file path inside the sandbox root directory. -func (c *criService) getSandboxHosts(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "hosts") -} - -// getResolvPath returns resolv.conf filepath for specified sandbox. -func (c *criService) getResolvPath(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "resolv.conf") -} - -// getSandboxDevShm returns the shm file path inside the sandbox root directory. -func (c *criService) getSandboxDevShm(id string) string { - return filepath.Join(c.getVolatileSandboxRootDir(id), "shm") -} - -// makeSandboxName generates sandbox name from sandbox metadata. The name -// generated is unique as long as sandbox metadata is unique. -func makeSandboxName(s *runtime.PodSandboxMetadata) string { - return strings.Join([]string{ - s.Name, // 0 - s.Namespace, // 1 - s.Uid, // 2 - strconv.FormatUint(uint64(s.Attempt), 10), // 3 - }, nameDelimiter) -} - -// makeContainerName generates container name from sandbox and container metadata. -// The name generated is unique as long as the sandbox container combination is -// unique. -func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetadata) string { - return strings.Join([]string{ - c.Name, // 0: container name - s.Name, // 1: pod name - s.Namespace, // 2: pod namespace - s.Uid, // 3: pod uid - strconv.FormatUint(uint64(c.Attempt), 10), // 4: attempt number of creating the container - }, nameDelimiter) -} - -// getContainerRootDir returns the root directory for managing container files, -// e.g. state checkpoint. -func (c *criService) getContainerRootDir(id string) string { - return filepath.Join(c.config.RootDir, containersDir, id) -} - -// getVolatileContainerRootDir returns the root directory for managing volatile container files, -// e.g. named pipes. -func (c *criService) getVolatileContainerRootDir(id string) string { - return filepath.Join(c.config.StateDir, containersDir, id) -} - -// criContainerStateToString formats CRI container state to string. -func criContainerStateToString(state runtime.ContainerState) string { - return runtime.ContainerState_name[int32(state)] -} - -// toContainerdImage converts an image object in image store to containerd image handler. -func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) { - // image should always have at least one reference. - if len(image.References) == 0 { - return nil, fmt.Errorf("invalid image with no reference %q", image.ID) - } - return c.client.GetImage(ctx, image.References[0]) -} - -// getUserFromImage gets uid or user name of the image user. -// If user is numeric, it will be treated as uid; or else, it is treated as user name. -func getUserFromImage(user string) (*int64, string) { - // return both empty if user is not specified in the image. - if user == "" { - return nil, "" - } - // split instances where the id may contain user:group - user = strings.Split(user, ":")[0] - // user could be either uid or user name. Try to interpret as numeric uid. - uid, err := strconv.ParseInt(user, 10, 64) - if err != nil { - // If user is non numeric, assume it's user name. - return nil, user - } - // If user is a numeric uid. - return &uid, "" -} - -// validateTargetContainer checks that a container is a valid -// target for a container using PID NamespaceMode_TARGET. -// The target container must be in the same sandbox and must be running. -// Returns the target container for convenience. -func (c *criService) validateTargetContainer(sandboxID, targetContainerID string) (containerstore.Container, error) { - targetContainer, err := c.containerStore.Get(targetContainerID) - if err != nil { - return containerstore.Container{}, fmt.Errorf("container %q does not exist: %w", targetContainerID, err) - } - - targetSandboxID := targetContainer.Metadata.SandboxID - if targetSandboxID != sandboxID { - return containerstore.Container{}, - fmt.Errorf("container %q (sandbox %s) does not belong to sandbox %s", targetContainerID, targetSandboxID, sandboxID) - } - - status := targetContainer.Status.Get() - if state := status.State(); state != runtime.ContainerState_CONTAINER_RUNNING { - return containerstore.Container{}, fmt.Errorf("container %q is not running - in state %s", targetContainerID, state) - } - - return targetContainer, nil -} - -// isInCRIMounts checks whether a destination is in CRI mount list. -func isInCRIMounts(dst string, mounts []*runtime.Mount) bool { - for _, m := range mounts { - if filepath.Clean(m.ContainerPath) == filepath.Clean(dst) { - return true - } - } - return false -} - -// filterLabel returns a label filter. Use `%q` here because containerd -// filter needs extra quote to work properly. -func filterLabel(k, v string) string { - return fmt.Sprintf("labels.%q==%q", k, v) -} - -// buildLabel builds the labels from config to be passed to containerd -func buildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string { - labels := make(map[string]string) - - for k, v := range imageConfigLabels { - if err := clabels.Validate(k, v); err == nil { - labels[k] = v - } else { - // In case the image label is invalid, we output a warning and skip adding it to the - // container. - log.L.WithError(err).Warnf("unable to add image label with key %s to the container", k) - } - } - // labels from the CRI request (config) will override labels in the image config - for k, v := range configLabels { - labels[k] = v - } - labels[containerKindLabel] = containerType - return labels -} - -// generateRuntimeOptions generates runtime options from cri plugin config. -func generateRuntimeOptions(r criconfig.Runtime) (interface{}, error) { - if r.Options == nil { - return nil, nil - } - - b, err := toml.Marshal(r.Options) - if err != nil { - return nil, fmt.Errorf("failed to marshal TOML blob for runtime %q: %w", r.Type, err) - } - - options := getRuntimeOptionsType(r.Type) - if err := toml.Unmarshal(b, options); err != nil { - return nil, err - } - - // For generic configuration, if no config path specified (preserving old behavior), pass - // the whole TOML configuration section to the runtime. - if runtimeOpts, ok := options.(*runtimeoptions.Options); ok && runtimeOpts.ConfigPath == "" { - runtimeOpts.ConfigBody = b - } - - return options, nil -} - -// getRuntimeOptionsType gets empty runtime options by the runtime type name. -func getRuntimeOptionsType(t string) interface{} { - switch t { - case plugin.RuntimeRuncV2: - return &runcoptions.Options{} - case runtimeRunhcsV1: - return &runhcsoptions.Options{} - default: - return &runtimeoptions.Options{} - } -} - -// getRuntimeOptions get runtime options from container metadata. -func getRuntimeOptions(c containers.Container) (interface{}, error) { - from := c.Runtime.Options - if from == nil || from.GetValue() == nil { - return nil, nil - } - opts, err := typeurl.UnmarshalAny(from) - if err != nil { - return nil, err - } - return opts, nil -} - -const ( - // unknownExitCode is the exit code when exit reason is unknown. - unknownExitCode = 255 - // unknownExitReason is the exit reason when exit reason is unknown. - unknownExitReason = "Unknown" -) - -// unknownContainerStatus returns the default container status when its status is unknown. -func unknownContainerStatus() containerstore.Status { - return containerstore.Status{ - CreatedAt: 0, - StartedAt: 0, - FinishedAt: 0, - ExitCode: unknownExitCode, - Reason: unknownExitReason, - Unknown: true, - } -} - -// getPassthroughAnnotations filters requested pod annotations by comparing -// against permitted annotations for the given runtime. -func getPassthroughAnnotations(podAnnotations map[string]string, - runtimePodAnnotations []string) (passthroughAnnotations map[string]string) { - passthroughAnnotations = make(map[string]string) - - for podAnnotationKey, podAnnotationValue := range podAnnotations { - for _, pattern := range runtimePodAnnotations { - // Use path.Match instead of filepath.Match here. - // filepath.Match treated `\\` as path separator - // on windows, which is not what we want. - if ok, _ := path.Match(pattern, podAnnotationKey); ok { - passthroughAnnotations[podAnnotationKey] = podAnnotationValue - } - } - } - return passthroughAnnotations -} - -// copyResourcesToStatus copys container resource contraints from spec to -// container status. -// This will need updates when new fields are added to ContainerResources. -func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) containerstore.Status { - status.Resources = &runtime.ContainerResources{} - if spec.Linux != nil { - status.Resources.Linux = &runtime.LinuxContainerResources{} - - if spec.Process != nil && spec.Process.OOMScoreAdj != nil { - status.Resources.Linux.OomScoreAdj = int64(*spec.Process.OOMScoreAdj) - } - - if spec.Linux.Resources == nil { - return status - } - - if spec.Linux.Resources.CPU != nil { - if spec.Linux.Resources.CPU.Period != nil { - status.Resources.Linux.CpuPeriod = int64(*spec.Linux.Resources.CPU.Period) - } - if spec.Linux.Resources.CPU.Quota != nil { - status.Resources.Linux.CpuQuota = *spec.Linux.Resources.CPU.Quota - } - if spec.Linux.Resources.CPU.Shares != nil { - status.Resources.Linux.CpuShares = int64(*spec.Linux.Resources.CPU.Shares) - } - status.Resources.Linux.CpusetCpus = spec.Linux.Resources.CPU.Cpus - status.Resources.Linux.CpusetMems = spec.Linux.Resources.CPU.Mems - } - - if spec.Linux.Resources.Memory != nil { - if spec.Linux.Resources.Memory.Limit != nil { - status.Resources.Linux.MemoryLimitInBytes = *spec.Linux.Resources.Memory.Limit - } - if spec.Linux.Resources.Memory.Swap != nil { - status.Resources.Linux.MemorySwapLimitInBytes = *spec.Linux.Resources.Memory.Swap - } - } - - if spec.Linux.Resources.HugepageLimits != nil { - hugepageLimits := make([]*runtime.HugepageLimit, 0, len(spec.Linux.Resources.HugepageLimits)) - for _, l := range spec.Linux.Resources.HugepageLimits { - hugepageLimits = append(hugepageLimits, &runtime.HugepageLimit{ - PageSize: l.Pagesize, - Limit: l.Limit, - }) - } - status.Resources.Linux.HugepageLimits = hugepageLimits - } - - if spec.Linux.Resources.Unified != nil { - status.Resources.Linux.Unified = spec.Linux.Resources.Unified - } - } - - if spec.Windows != nil { - status.Resources.Windows = &runtime.WindowsContainerResources{} - if spec.Windows.Resources == nil { - return status - } - - if spec.Windows.Resources.CPU != nil { - if spec.Windows.Resources.CPU.Shares != nil { - status.Resources.Windows.CpuShares = int64(*spec.Windows.Resources.CPU.Shares) - } - if spec.Windows.Resources.CPU.Count != nil { - status.Resources.Windows.CpuCount = int64(*spec.Windows.Resources.CPU.Count) - } - if spec.Windows.Resources.CPU.Maximum != nil { - status.Resources.Windows.CpuMaximum = int64(*spec.Windows.Resources.CPU.Maximum) - } - } - - if spec.Windows.Resources.Memory != nil { - if spec.Windows.Resources.Memory.Limit != nil { - status.Resources.Windows.MemoryLimitInBytes = int64(*spec.Windows.Resources.Memory.Limit) - } - } - - // TODO: Figure out how to get RootfsSizeInBytes - } - return status -} - -func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) { - podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID) - if err != nil { - log.G(ctx).Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err) - podSandboxStatus = nil - } - containerStatuses, err := c.getContainerStatuses(ctx, sandboxID) - if err != nil { - log.G(ctx).Errorf("Failed to get container statuses for container event for sandboxID %q: %v", sandboxID, err) - } - - event := runtime.ContainerEventResponse{ - ContainerId: containerID, - ContainerEventType: eventType, - CreatedAt: time.Now().UnixNano(), - PodSandboxStatus: podSandboxStatus, - ContainersStatuses: containerStatuses, - } - - // TODO(ruiwen-zhao): write events to a cache, storage, or increase the size of the channel - select { - case c.containerEventsChan <- event: - default: - containerEventsDroppedCount.Inc() - log.G(ctx).Debugf("containerEventsChan is full, discarding event %+v", event) - } -} - -func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) { - request := &runtime.PodSandboxStatusRequest{PodSandboxId: podSandboxID} - response, err := c.PodSandboxStatus(ctx, request) - if err != nil { - return nil, err - } - return response.GetStatus(), nil -} - -func (c *criService) getContainerStatuses(ctx context.Context, podSandboxID string) ([]*runtime.ContainerStatus, error) { - response, err := c.ListContainers(ctx, &runtime.ListContainersRequest{ - Filter: &runtime.ContainerFilter{ - PodSandboxId: podSandboxID, - }, - }) - if err != nil { - return nil, err - } - containerStatuses := []*runtime.ContainerStatus{} - for _, container := range response.Containers { - statusResp, err := c.ContainerStatus(ctx, &runtime.ContainerStatusRequest{ - ContainerId: container.Id, - Verbose: false, - }) - if err != nil { - if errdefs.IsNotFound(err) { - continue - } - return nil, err - } - containerStatuses = append(containerStatuses, statusResp.GetStatus()) - } - return containerStatuses, nil -} - -// hostNetwork handles checking if host networking was requested. -func hostNetwork(config *runtime.PodSandboxConfig) bool { - var hostNet bool - switch goruntime.GOOS { - case "windows": - // Windows HostProcess pods can only run on the host network - hostNet = config.GetWindows().GetSecurityContext().GetHostProcess() - case "darwin": - // No CNI on Darwin yet. - hostNet = true - default: - // Even on other platforms, the logic containerd uses is to check if NamespaceMode == NODE. - // So this handles Linux, as well as any other platforms not governed by the cases above - // that have special quirks. - hostNet = config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE - } - return hostNet -} - -// getCgroupsPath generates container cgroups path. -func getCgroupsPath(cgroupsParent, id string) string { - base := path.Base(cgroupsParent) - if strings.HasSuffix(base, ".slice") { - // For a.slice/b.slice/c.slice, base is c.slice. - // runc systemd cgroup path format is "slice:prefix:name". - return strings.Join([]string{base, "cri-containerd", id}, ":") - } - return filepath.Join(cgroupsParent, id) -} - -func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) { - var labels []string - - if selinuxOptions == nil { - return nil, nil - } - if err := checkSelinuxLevel(selinuxOptions.Level); err != nil { - return nil, err - } - if selinuxOptions.User != "" { - labels = append(labels, "user:"+selinuxOptions.User) - } - if selinuxOptions.Role != "" { - labels = append(labels, "role:"+selinuxOptions.Role) - } - if selinuxOptions.Type != "" { - labels = append(labels, "type:"+selinuxOptions.Type) - } - if selinuxOptions.Level != "" { - labels = append(labels, "level:"+selinuxOptions.Level) - } - - return labels, nil -} - -func checkSelinuxLevel(level string) error { - if len(level) == 0 { - return nil - } - - matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level) - if err != nil { - return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err) - } - if !matched { - return fmt.Errorf("the format of 'level' %q is not correct", level) - } - return nil -} - -func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]runtimespec.LinuxIDMapping, error) { - var m []runtimespec.LinuxIDMapping - - if len(runtimeIDMap) == 0 { - return m, nil - } - - if len(runtimeIDMap) > 1 { - // We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that. - return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap)) - } - - // We know len is 1 now. - if runtimeIDMap[0] == nil { - return m, nil - } - uidMap := *runtimeIDMap[0] - - if uidMap.Length < 1 { - return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length) - } - - m = []runtimespec.LinuxIDMapping{ - { - ContainerID: uidMap.ContainerId, - HostID: uidMap.HostId, - Size: uidMap.Length, - }, - } - - return m, nil -} - -func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []runtimespec.LinuxIDMapping, retErr error) { - if userns == nil { - // If userns is not set, the kubelet doesn't support this option - // and we should just fallback to no userns. This is completely - // valid. - return nil, nil, nil - } - - uids, err := parseUsernsIDMap(userns.GetUids()) - if err != nil { - return nil, nil, fmt.Errorf("UID mapping: %w", err) - } - - gids, err = parseUsernsIDMap(userns.GetGids()) - if err != nil { - return nil, nil, fmt.Errorf("GID mapping: %w", err) - } - - switch mode := userns.GetMode(); mode { - case runtime.NamespaceMode_NODE: - if len(uids) != 0 || len(gids) != 0 { - return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids)) - } - case runtime.NamespaceMode_POD: - // This is valid, we will handle it in WithPodNamespaces(). - if len(uids) == 0 || len(gids) == 0 { - return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode) - } - default: - return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode) - } - - return uids, gids, nil -} - -// sameUsernsConfig checks if the userns configs are the same. If the mappings -// on each config are the same but in different order, it returns false. -// XXX: If the runtime.UserNamespace struct changes, we should update this -// function accordingly. -func sameUsernsConfig(a, b *runtime.UserNamespace) bool { - // If both are nil, they are the same. - if a == nil && b == nil { - return true - } - // If only one is nil, they are different. - if a == nil || b == nil { - return false - } - // At this point, a is not nil nor b. - - if a.GetMode() != b.GetMode() { - return false - } - - aUids, aGids, err := parseUsernsIDs(a) - if err != nil { - return false - } - bUids, bGids, err := parseUsernsIDs(b) - if err != nil { - return false - } - - if !sameMapping(aUids, bUids) { - return false - } - if !sameMapping(aGids, bGids) { - return false - } - return true -} - -// sameMapping checks if the mappings are the same. If the mappings are the same -// but in different order, it returns false. -func sameMapping(a, b []runtimespec.LinuxIDMapping) bool { - if len(a) != len(b) { - return false - } - - for x := range a { - if a[x].ContainerID != b[x].ContainerID { - return false - } - if a[x].HostID != b[x].HostID { - return false - } - if a[x].Size != b[x].Size { - return false - } - } - return true -} diff --git a/pkg/cri/sbserver/helpers_linux.go b/pkg/cri/sbserver/helpers_linux.go deleted file mode 100644 index 36b161fda..000000000 --- a/pkg/cri/sbserver/helpers_linux.go +++ /dev/null @@ -1,209 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "os" - "path/filepath" - "sort" - "strings" - "syscall" - "time" - - "github.com/containerd/cgroups/v3" - "github.com/moby/sys/mountinfo" - "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/pkg/apparmor" - "github.com/containerd/containerd/pkg/seccomp" - "github.com/containerd/containerd/pkg/seutil" - "github.com/containerd/containerd/snapshots" - "github.com/containerd/log" -) - -// apparmorEnabled returns true if apparmor is enabled, supported by the host, -// if apparmor_parser is installed, and if we are not running docker-in-docker. -func (c *criService) apparmorEnabled() bool { - if c.config.DisableApparmor { - return false - } - return apparmor.HostSupports() -} - -func (c *criService) seccompEnabled() bool { - return seccomp.IsEnabled() -} - -// openLogFile opens/creates a container log file. -func openLogFile(path string) (*os.File, error) { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return nil, err - } - return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640) -} - -// unmountRecursive unmounts the target and all mounts underneath, starting with -// the deepest mount first. -func unmountRecursive(ctx context.Context, target string) error { - target, err := mount.CanonicalizePath(target) - if err != nil { - return err - } - - toUnmount, err := mountinfo.GetMounts(mountinfo.PrefixFilter(target)) - if err != nil { - return err - } - - // Make the deepest mount be first - sort.Slice(toUnmount, func(i, j int) bool { - return len(toUnmount[i].Mountpoint) > len(toUnmount[j].Mountpoint) - }) - - for i, m := range toUnmount { - if err := mount.UnmountAll(m.Mountpoint, unix.MNT_DETACH); err != nil { - if i == len(toUnmount)-1 { // last mount - return err - } - // This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem - log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", m.Mountpoint) - } - } - return nil -} - -// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can -// often be remedied. -// Only use `ensureRemoveAll` if you really want to make every effort to remove -// a directory. -// -// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there -// can be a race between reading directory entries and then actually attempting -// to remove everything in the directory. -// These types of errors do not need to be returned since it's ok for the dir to -// be gone we can just retry the remove operation. -// -// This should not return a `os.ErrNotExist` kind of error under any circumstances -func ensureRemoveAll(ctx context.Context, dir string) error { - notExistErr := make(map[string]bool) - - // track retries - exitOnErr := make(map[string]int) - maxRetry := 50 - - // Attempt to unmount anything beneath this dir first. - if err := unmountRecursive(ctx, dir); err != nil { - log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir) - } - - for { - err := os.RemoveAll(dir) - if err == nil { - return nil - } - - pe, ok := err.(*os.PathError) - if !ok { - return err - } - - if os.IsNotExist(err) { - if notExistErr[pe.Path] { - return err - } - notExistErr[pe.Path] = true - - // There is a race where some subdir can be removed but after the - // parent dir entries have been read. - // So the path could be from `os.Remove(subdir)` - // If the reported non-existent path is not the passed in `dir` we - // should just retry, but otherwise return with no error. - if pe.Path == dir { - return nil - } - continue - } - - if pe.Err != syscall.EBUSY { - return err - } - if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil { - return fmt.Errorf("error while removing %s: %w", dir, e) - } - - if exitOnErr[pe.Path] == maxRetry { - return err - } - exitOnErr[pe.Path]++ - time.Sleep(100 * time.Millisecond) - } -} - -var vmbasedRuntimes = []string{ - "io.containerd.kata", -} - -func isVMBasedRuntime(runtimeType string) bool { - for _, rt := range vmbasedRuntimes { - if strings.Contains(runtimeType, rt) { - return true - } - } - return false -} - -func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { - if !isVMBasedRuntime(runtimeType) { - return nil - } - l, err := seutil.ChangeToKVM(spec.Process.SelinuxLabel) - if err != nil { - return fmt.Errorf("failed to get selinux kvm label: %w", err) - } - spec.Process.SelinuxLabel = l - return nil -} - -// getCgroupsMode returns cgropu mode. -// TODO: add build constraints to cgroups package and remove this helper -func isUnifiedCgroupsMode() bool { - return cgroups.Mode() == cgroups.Unified -} - -func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) { - snapshotOpt := []snapshots.Opt{} - usernsOpts := nsOpts.GetUsernsOptions() - if usernsOpts == nil { - return snapshotOpt, nil - } - - uids, gids, err := parseUsernsIDs(usernsOpts) - if err != nil { - return nil, fmt.Errorf("user namespace configuration: %w", err) - } - - if usernsOpts.GetMode() == runtime.NamespaceMode_POD { - snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size)) - } - return snapshotOpt, nil -} diff --git a/pkg/cri/sbserver/helpers_other.go b/pkg/cri/sbserver/helpers_other.go deleted file mode 100644 index aef880153..000000000 --- a/pkg/cri/sbserver/helpers_other.go +++ /dev/null @@ -1,47 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "os" - - "github.com/opencontainers/runtime-spec/specs-go" -) - -// openLogFile opens/creates a container log file. -func openLogFile(path string) (*os.File, error) { - return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640) -} - -// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can -// often be remedied. -// Only use `ensureRemoveAll` if you really want to make every effort to remove -// a directory. -func ensureRemoveAll(ctx context.Context, dir string) error { - return os.RemoveAll(dir) -} - -func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { - return nil -} - -func isUnifiedCgroupsMode() bool { - return false -} diff --git a/pkg/cri/sbserver/helpers_test.go b/pkg/cri/sbserver/helpers_test.go deleted file mode 100644 index 8088850d6..000000000 --- a/pkg/cri/sbserver/helpers_test.go +++ /dev/null @@ -1,557 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "os" - goruntime "runtime" - "strings" - "testing" - "time" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/oci" - criconfig "github.com/containerd/containerd/pkg/cri/config" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - "github.com/containerd/containerd/plugin" - "github.com/containerd/containerd/protobuf/types" - runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" - "github.com/containerd/typeurl/v2" - - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pelletier/go-toml/v2" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// TestGetUserFromImage tests the logic of getting image uid or user name of image user. -func TestGetUserFromImage(t *testing.T) { - newI64 := func(i int64) *int64 { return &i } - for _, test := range []struct { - desc string - user string - uid *int64 - name string - }{ - { - desc: "no gid", - user: "0", - uid: newI64(0), - }, - { - desc: "uid/gid", - user: "0:1", - uid: newI64(0), - }, - { - desc: "empty user", - user: "", - }, - { - desc: "multiple separators", - user: "1:2:3", - uid: newI64(1), - }, - { - desc: "root username", - user: "root:root", - name: "root", - }, - { - desc: "username", - user: "test:test", - name: "test", - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - actualUID, actualName := getUserFromImage(test.user) - assert.Equal(t, test.uid, actualUID) - assert.Equal(t, test.name, actualName) - }) - } -} - -func TestBuildLabels(t *testing.T) { - imageConfigLabels := map[string]string{ - "a": "z", - "d": "y", - "long-label": strings.Repeat("example", 10000), - } - configLabels := map[string]string{ - "a": "b", - "c": "d", - } - newLabels := buildLabels(configLabels, imageConfigLabels, containerKindSandbox) - assert.Len(t, newLabels, 4) - assert.Equal(t, "b", newLabels["a"]) - assert.Equal(t, "d", newLabels["c"]) - assert.Equal(t, "y", newLabels["d"]) - assert.Equal(t, containerKindSandbox, newLabels[containerKindLabel]) - assert.NotContains(t, newLabels, "long-label") - - newLabels["a"] = "e" - assert.Empty(t, configLabels[containerKindLabel], "should not add new labels into original label") - assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label") -} - -func TestGenerateRuntimeOptions(t *testing.T) { - nilOpts := ` -systemd_cgroup = true -[containerd] - no_pivot = true - default_runtime_name = "default" -[containerd.runtimes.runcv2] - runtime_type = "` + plugin.RuntimeRuncV2 + `" -` - nonNilOpts := ` -systemd_cgroup = true -[containerd] - no_pivot = true - default_runtime_name = "default" -[containerd.runtimes.legacy.options] - Runtime = "legacy" - RuntimeRoot = "/legacy" -[containerd.runtimes.runc.options] - BinaryName = "runc" - Root = "/runc" - NoNewKeyring = true -[containerd.runtimes.runcv2] - runtime_type = "` + plugin.RuntimeRuncV2 + `" -[containerd.runtimes.runcv2.options] - BinaryName = "runc" - Root = "/runcv2" - NoNewKeyring = true -` - var nilOptsConfig, nonNilOptsConfig criconfig.Config - err := toml.Unmarshal([]byte(nilOpts), &nilOptsConfig) - require.NoError(t, err) - require.Len(t, nilOptsConfig.Runtimes, 1) - - err = toml.Unmarshal([]byte(nonNilOpts), &nonNilOptsConfig) - require.NoError(t, err) - require.Len(t, nonNilOptsConfig.Runtimes, 3) - - for _, test := range []struct { - desc string - r criconfig.Runtime - c criconfig.Config - expectedOptions interface{} - }{ - { - desc: "when options is nil, should return nil option for io.containerd.runc.v2", - r: nilOptsConfig.Runtimes["runcv2"], - c: nilOptsConfig, - expectedOptions: nil, - }, - { - desc: "when options is not nil, should be able to decode for io.containerd.runc.v2", - r: nonNilOptsConfig.Runtimes["runcv2"], - c: nonNilOptsConfig, - expectedOptions: &runcoptions.Options{ - BinaryName: "runc", - Root: "/runcv2", - NoNewKeyring: true, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - opts, err := generateRuntimeOptions(test.r) - assert.NoError(t, err) - assert.Equal(t, test.expectedOptions, opts) - }) - } -} - -func TestEnvDeduplication(t *testing.T) { - for _, test := range []struct { - desc string - existing []string - kv [][2]string - expected []string - }{ - { - desc: "single env", - kv: [][2]string{ - {"a", "b"}, - }, - expected: []string{"a=b"}, - }, - { - desc: "multiple envs", - kv: [][2]string{ - {"a", "b"}, - {"c", "d"}, - {"e", "f"}, - }, - expected: []string{ - "a=b", - "c=d", - "e=f", - }, - }, - { - desc: "env override", - kv: [][2]string{ - {"k1", "v1"}, - {"k2", "v2"}, - {"k3", "v3"}, - {"k3", "v4"}, - {"k1", "v5"}, - {"k4", "v6"}, - }, - expected: []string{ - "k1=v5", - "k2=v2", - "k3=v4", - "k4=v6", - }, - }, - { - desc: "existing env", - existing: []string{ - "k1=v1", - "k2=v2", - "k3=v3", - }, - kv: [][2]string{ - {"k3", "v4"}, - {"k2", "v5"}, - {"k4", "v6"}, - }, - expected: []string{ - "k1=v1", - "k2=v5", - "k3=v4", - "k4=v6", - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - var spec runtimespec.Spec - if len(test.existing) > 0 { - spec.Process = &runtimespec.Process{ - Env: test.existing, - } - } - for _, kv := range test.kv { - oci.WithEnv([]string{kv[0] + "=" + kv[1]})(context.Background(), nil, nil, &spec) - } - assert.Equal(t, test.expected, spec.Process.Env) - }) - } -} - -func TestPassThroughAnnotationsFilter(t *testing.T) { - for _, test := range []struct { - desc string - podAnnotations map[string]string - runtimePodAnnotations []string - passthroughAnnotations map[string]string - }{ - { - desc: "should support direct match", - podAnnotations: map[string]string{"c": "d", "d": "e"}, - runtimePodAnnotations: []string{"c"}, - passthroughAnnotations: map[string]string{"c": "d"}, - }, - { - desc: "should support wildcard match", - podAnnotations: map[string]string{ - "t.f": "j", - "z.g": "o", - "z": "o", - "y.ca": "b", - "y": "b", - }, - runtimePodAnnotations: []string{"*.f", "z*g", "y.c*"}, - passthroughAnnotations: map[string]string{ - "t.f": "j", - "z.g": "o", - "y.ca": "b", - }, - }, - { - desc: "should support wildcard match all", - podAnnotations: map[string]string{ - "t.f": "j", - "z.g": "o", - "z": "o", - "y.ca": "b", - "y": "b", - }, - runtimePodAnnotations: []string{"*"}, - passthroughAnnotations: map[string]string{ - "t.f": "j", - "z.g": "o", - "z": "o", - "y.ca": "b", - "y": "b", - }, - }, - { - desc: "should support match including path separator", - podAnnotations: map[string]string{ - "matchend.com/end": "1", - "matchend.com/end1": "2", - "matchend.com/1end": "3", - "matchmid.com/mid": "4", - "matchmid.com/mi1d": "5", - "matchmid.com/mid1": "6", - "matchhead.com/head": "7", - "matchhead.com/1head": "8", - "matchhead.com/head1": "9", - "matchall.com/abc": "10", - "matchall.com/def": "11", - "end/matchend": "12", - "end1/matchend": "13", - "1end/matchend": "14", - "mid/matchmid": "15", - "mi1d/matchmid": "16", - "mid1/matchmid": "17", - "head/matchhead": "18", - "1head/matchhead": "19", - "head1/matchhead": "20", - "abc/matchall": "21", - "def/matchall": "22", - "match1/match2": "23", - "nomatch/nomatch": "24", - }, - runtimePodAnnotations: []string{ - "matchend.com/end*", - "matchmid.com/mi*d", - "matchhead.com/*head", - "matchall.com/*", - "end*/matchend", - "mi*d/matchmid", - "*head/matchhead", - "*/matchall", - "match*/match*", - }, - passthroughAnnotations: map[string]string{ - "matchend.com/end": "1", - "matchend.com/end1": "2", - "matchmid.com/mid": "4", - "matchmid.com/mi1d": "5", - "matchhead.com/head": "7", - "matchhead.com/1head": "8", - "matchall.com/abc": "10", - "matchall.com/def": "11", - "end/matchend": "12", - "end1/matchend": "13", - "mid/matchmid": "15", - "mi1d/matchmid": "16", - "head/matchhead": "18", - "1head/matchhead": "19", - "abc/matchall": "21", - "def/matchall": "22", - "match1/match2": "23", - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - passthroughAnnotations := getPassthroughAnnotations(test.podAnnotations, test.runtimePodAnnotations) - assert.Equal(t, test.passthroughAnnotations, passthroughAnnotations) - }) - } -} - -func TestEnsureRemoveAllNotExist(t *testing.T) { - // should never return an error for a non-existent path - if err := ensureRemoveAll(context.Background(), "/non/existent/path"); err != nil { - t.Fatal(err) - } -} - -func TestEnsureRemoveAllWithDir(t *testing.T) { - dir := t.TempDir() - if err := ensureRemoveAll(context.Background(), dir); err != nil { - t.Fatal(err) - } -} - -func TestEnsureRemoveAllWithFile(t *testing.T) { - tmp, err := os.CreateTemp("", "test-ensure-removeall-with-dir") - if err != nil { - t.Fatal(err) - } - tmp.Close() - if err := ensureRemoveAll(context.Background(), tmp.Name()); err != nil { - t.Fatal(err) - } -} - -// Helper function for setting up an environment to test PID namespace targeting. -func addContainer(c *criService, containerID, sandboxID string, PID uint32, createdAt, startedAt, finishedAt int64) error { - meta := containerstore.Metadata{ - ID: containerID, - SandboxID: sandboxID, - } - status := containerstore.Status{ - Pid: PID, - CreatedAt: createdAt, - StartedAt: startedAt, - FinishedAt: finishedAt, - } - container, err := containerstore.NewContainer(meta, - containerstore.WithFakeStatus(status), - ) - if err != nil { - return err - } - return c.containerStore.Add(container) -} - -func TestValidateTargetContainer(t *testing.T) { - testSandboxID := "test-sandbox-uid" - - // The existing container that will be targeted. - testTargetContainerID := "test-target-container" - testTargetContainerPID := uint32(4567) - - // A container that has finished running and cannot be targeted. - testStoppedContainerID := "stopped-target-container" - testStoppedContainerPID := uint32(6789) - - // A container from another pod. - testOtherContainerSandboxID := "other-sandbox-uid" - testOtherContainerID := "other-target-container" - testOtherContainerPID := uint32(7890) - - // Container create/start/stop times. - createdAt := time.Now().Add(-15 * time.Second).UnixNano() - startedAt := time.Now().Add(-10 * time.Second).UnixNano() - finishedAt := time.Now().Add(-5 * time.Second).UnixNano() - - c := newTestCRIService() - - // Create a target container. - err := addContainer(c, testTargetContainerID, testSandboxID, testTargetContainerPID, createdAt, startedAt, 0) - require.NoError(t, err, "error creating test target container") - - // Create a stopped container. - err = addContainer(c, testStoppedContainerID, testSandboxID, testStoppedContainerPID, createdAt, startedAt, finishedAt) - require.NoError(t, err, "error creating test stopped container") - - // Create a container in another pod. - err = addContainer(c, testOtherContainerID, testOtherContainerSandboxID, testOtherContainerPID, createdAt, startedAt, 0) - require.NoError(t, err, "error creating test container in other pod") - - for _, test := range []struct { - desc string - targetContainerID string - expectError bool - }{ - { - desc: "target container in pod", - targetContainerID: testTargetContainerID, - expectError: false, - }, - { - desc: "target stopped container in pod", - targetContainerID: testStoppedContainerID, - expectError: true, - }, - { - desc: "target container does not exist", - targetContainerID: "no-container-with-this-id", - expectError: true, - }, - { - desc: "target container in other pod", - targetContainerID: testOtherContainerID, - expectError: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - targetContainer, err := c.validateTargetContainer(testSandboxID, test.targetContainerID) - if test.expectError { - require.Error(t, err, "target should have been invalid but no error") - return - } - require.NoErrorf(t, err, "target should have been valid but got error") - - assert.Equal(t, test.targetContainerID, targetContainer.ID, "returned target container does not have expected ID") - }) - } - -} - -func TestGetRuntimeOptions(t *testing.T) { - _, err := getRuntimeOptions(containers.Container{}) - require.NoError(t, err) - - var pbany *types.Any // This is nil. - var typeurlAny typeurl.Any = pbany // This is typed nil. - _, err = getRuntimeOptions(containers.Container{Runtime: containers.RuntimeInfo{Options: typeurlAny}}) - require.NoError(t, err) -} - -func TestHostNetwork(t *testing.T) { - tests := []struct { - name string - c *runtime.PodSandboxConfig - expected bool - }{ - { - name: "when pod namespace return false", - c: &runtime.PodSandboxConfig{ - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_POD, - }, - }, - }, - }, - expected: false, - }, - { - name: "when node namespace return true", - c: &runtime.PodSandboxConfig{ - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - }, - }, - }, - }, - expected: true, - }, - } - - for _, tt := range tests { - if goruntime.GOOS != "linux" { - t.Skip() - } - - tt := tt - t.Run(tt.name, func(t *testing.T) { - if hostNetwork(tt.c) != tt.expected { - t.Errorf("failed hostNetwork got %t expected %t", hostNetwork(tt.c), tt.expected) - } - }) - } -} diff --git a/pkg/cri/sbserver/helpers_windows.go b/pkg/cri/sbserver/helpers_windows.go deleted file mode 100644 index 808a95e75..000000000 --- a/pkg/cri/sbserver/helpers_windows.go +++ /dev/null @@ -1,175 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "os" - "path/filepath" - "syscall" - - "github.com/opencontainers/runtime-spec/specs-go" -) - -// openLogFile opens/creates a container log file. -// It specifies `FILE_SHARE_DELETE` option to make sure -// log files can be rotated by kubelet. -// -// Unfortunately this needs to be maintained as Go doesn't -// have a way to set FILE_SHARE_DELETE for os.OpenFile. -// https://github.com/golang/go/issues/32088 -func openLogFile(path string) (*os.File, error) { - path = fixLongPath(path) - if len(path) == 0 { - return nil, syscall.ERROR_FILE_NOT_FOUND - } - pathp, err := syscall.UTF16PtrFromString(path) - if err != nil { - return nil, err - } - createmode := uint32(syscall.OPEN_ALWAYS) - access := uint32(syscall.FILE_APPEND_DATA) - sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE) - h, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0) - if err != nil { - return nil, err - } - return os.NewFile(uintptr(h), path), nil -} - -// Copyright (c) 2009 The Go Authors. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// fixLongPath returns the extended-length (\\?\-prefixed) form of -// path when needed, in order to avoid the default 260 character file -// path limit imposed by Windows. If path is not easily converted to -// the extended-length form (for example, if path is a relative path -// or contains .. elements), or is short enough, fixLongPath returns -// path unmodified. -// -// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath -// -// This is copied from https://golang.org/src/path/filepath/path_windows.go. -func fixLongPath(path string) string { - // Do nothing (and don't allocate) if the path is "short". - // Empirically (at least on the Windows Server 2013 builder), - // the kernel is arbitrarily okay with < 248 bytes. That - // matches what the docs above say: - // "When using an API to create a directory, the specified - // path cannot be so long that you cannot append an 8.3 file - // name (that is, the directory name cannot exceed MAX_PATH - // minus 12)." Since MAX_PATH is 260, 260 - 12 = 248. - // - // The MSDN docs appear to say that a normal path that is 248 bytes long - // will work; empirically the path must be less then 248 bytes long. - if len(path) < 248 { - // Don't fix. (This is how Go 1.7 and earlier worked, - // not automatically generating the \\?\ form) - return path - } - - // The extended form begins with \\?\, as in - // \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt. - // The extended form disables evaluation of . and .. path - // elements and disables the interpretation of / as equivalent - // to \. The conversion here rewrites / to \ and elides - // . elements as well as trailing or duplicate separators. For - // simplicity it avoids the conversion entirely for relative - // paths or paths containing .. elements. For now, - // \\server\share paths are not converted to - // \\?\UNC\server\share paths because the rules for doing so - // are less well-specified. - if len(path) >= 2 && path[:2] == `\\` { - // Don't canonicalize UNC paths. - return path - } - if !filepath.IsAbs(path) { - // Relative path - return path - } - - const prefix = `\\?` - - pathbuf := make([]byte, len(prefix)+len(path)+len(`\`)) - copy(pathbuf, prefix) - n := len(path) - r, w := 0, len(prefix) - for r < n { - switch { - case os.IsPathSeparator(path[r]): - // empty block - r++ - case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])): - // /./ - r++ - case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])): - // /../ is currently unhandled - return path - default: - pathbuf[w] = '\\' - w++ - for ; r < n && !os.IsPathSeparator(path[r]); r++ { - pathbuf[w] = path[r] - w++ - } - } - } - // A drive's root directory needs a trailing \ - if w == len(`\\?\c:`) { - pathbuf[w] = '\\' - w++ - } - return string(pathbuf[:w]) -} - -// ensureRemoveAll is a wrapper for os.RemoveAll on Windows. -func ensureRemoveAll(_ context.Context, dir string) error { - return os.RemoveAll(dir) -} - -func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { - return nil -} - -func isUnifiedCgroupsMode() bool { - return false -} diff --git a/pkg/cri/sbserver/helpers_windows_test.go b/pkg/cri/sbserver/helpers_windows_test.go deleted file mode 100644 index 3cc844bc0..000000000 --- a/pkg/cri/sbserver/helpers_windows_test.go +++ /dev/null @@ -1,71 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestWindowsHostNetwork(t *testing.T) { - tests := []struct { - name string - c *runtime.PodSandboxConfig - expected bool - }{ - { - name: "when host process is false returns false", - c: &runtime.PodSandboxConfig{ - Windows: &runtime.WindowsPodSandboxConfig{ - SecurityContext: &runtime.WindowsSandboxSecurityContext{ - HostProcess: false, - }, - }, - }, - expected: false, - }, - { - name: "when host process is true return true", - c: &runtime.PodSandboxConfig{ - Windows: &runtime.WindowsPodSandboxConfig{ - SecurityContext: &runtime.WindowsSandboxSecurityContext{ - HostProcess: true, - }, - }, - }, - expected: true, - }, - { - name: "when no host process return false", - c: &runtime.PodSandboxConfig{ - Windows: &runtime.WindowsPodSandboxConfig{ - SecurityContext: &runtime.WindowsSandboxSecurityContext{}, - }, - }, - expected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if hostNetwork(tt.c) != tt.expected { - t.Errorf("failed hostNetwork got %t expected %t", hostNetwork(tt.c), tt.expected) - } - }) - } -} diff --git a/pkg/cri/sbserver/list_metric_descriptors.go b/pkg/cri/sbserver/list_metric_descriptors.go deleted file mode 100644 index 3fec5450d..000000000 --- a/pkg/cri/sbserver/list_metric_descriptors.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) ListMetricDescriptors(context.Context, *runtime.ListMetricDescriptorsRequest) (*runtime.ListMetricDescriptorsResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method ListMetricDescriptors not implemented") -} diff --git a/pkg/cri/sbserver/list_pod_sandbox_metrics.go b/pkg/cri/sbserver/list_pod_sandbox_metrics.go deleted file mode 100644 index 33ce32009..000000000 --- a/pkg/cri/sbserver/list_pod_sandbox_metrics.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) ListPodSandboxMetrics(context.Context, *runtime.ListPodSandboxMetricsRequest) (*runtime.ListPodSandboxMetricsResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method ListPodSandboxMetrics not implemented") -} diff --git a/pkg/cri/sbserver/metrics.go b/pkg/cri/sbserver/metrics.go deleted file mode 100644 index b5ed72b10..000000000 --- a/pkg/cri/sbserver/metrics.go +++ /dev/null @@ -1,76 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "github.com/docker/go-metrics" -) - -var ( - sandboxListTimer metrics.Timer - sandboxCreateNetworkTimer metrics.Timer - sandboxDeleteNetwork metrics.Timer - - sandboxRuntimeCreateTimer metrics.LabeledTimer - sandboxRuntimeStopTimer metrics.LabeledTimer - sandboxRemoveTimer metrics.LabeledTimer - - containerListTimer metrics.Timer - containerRemoveTimer metrics.LabeledTimer - containerCreateTimer metrics.LabeledTimer - containerStopTimer metrics.LabeledTimer - containerStartTimer metrics.LabeledTimer - containerEventsDroppedCount metrics.Counter - - networkPluginOperations metrics.LabeledCounter - networkPluginOperationsErrors metrics.LabeledCounter - networkPluginOperationsLatency metrics.LabeledTimer -) - -func init() { - // these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle. - ns := metrics.NewNamespace("containerd", "cri_sandboxed", nil) - - sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes") - sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox") - sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network") - - sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime") - sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime") - sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime") - - containerListTimer = ns.NewTimer("container_list", "time to list containers") - containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime") - containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime") - containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") - containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") - containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start") - - networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type") - networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type") - networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type") - - metrics.Register(ns) -} - -// for backwards compatibility with kubelet/dockershim metrics -// https://github.com/containerd/containerd/issues/7801 -const ( - networkStatusOp = "get_pod_network_status" - networkSetUpOp = "set_up_pod" - networkTearDownOp = "tear_down_pod" -) diff --git a/pkg/cri/sbserver/nri.go b/pkg/cri/sbserver/nri.go deleted file mode 100644 index d76b74759..000000000 --- a/pkg/cri/sbserver/nri.go +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - criconfig "github.com/containerd/containerd/pkg/cri/config" - cstore "github.com/containerd/containerd/pkg/cri/store/container" - sstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -type criImplementation struct { - c *criService -} - -func (i *criImplementation) Config() *criconfig.Config { - return &i.c.config -} - -func (i *criImplementation) SandboxStore() *sstore.Store { - return i.c.sandboxStore -} - -func (i *criImplementation) ContainerStore() *cstore.Store { - return i.c.containerStore -} - -func (i *criImplementation) ContainerMetadataExtensionKey() string { - return containerMetadataExtension -} diff --git a/pkg/cri/sbserver/nri_linux.go b/pkg/cri/sbserver/nri_linux.go deleted file mode 100644 index 38c2f5e85..000000000 --- a/pkg/cri/sbserver/nri_linux.go +++ /dev/null @@ -1,35 +0,0 @@ -//go:build linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "time" - - cstore "github.com/containerd/containerd/pkg/cri/store/container" - cri "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (i *criImplementation) UpdateContainerResources(ctx context.Context, ctr cstore.Container, req *cri.UpdateContainerResourcesRequest, status cstore.Status) (cstore.Status, error) { - return i.c.updateContainerResources(ctx, ctr, req, status) -} - -func (i *criImplementation) StopContainer(ctx context.Context, ctr cstore.Container, timeout time.Duration) error { - return i.c.stopContainer(ctx, ctr, timeout) -} diff --git a/pkg/cri/sbserver/nri_other.go b/pkg/cri/sbserver/nri_other.go deleted file mode 100644 index f88c16a3a..000000000 --- a/pkg/cri/sbserver/nri_other.go +++ /dev/null @@ -1,35 +0,0 @@ -//go:build !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "time" - - cstore "github.com/containerd/containerd/pkg/cri/store/container" - cri "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (i *criImplementation) UpdateContainerResources(ctx context.Context, ctr cstore.Container, req *cri.UpdateContainerResourcesRequest, status cstore.Status) (cstore.Status, error) { - return cstore.Status{}, nil -} - -func (i *criImplementation) StopContainer(ctx context.Context, ctr cstore.Container, timeout time.Duration) error { - return nil -} diff --git a/pkg/cri/sbserver/rdt.go b/pkg/cri/sbserver/rdt.go deleted file mode 100644 index c03bce55e..000000000 --- a/pkg/cri/sbserver/rdt.go +++ /dev/null @@ -1,49 +0,0 @@ -//go:build !no_rdt - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "fmt" - - "github.com/containerd/containerd/pkg/rdt" - "github.com/containerd/log" -) - -// rdtClassFromAnnotations examines container and pod annotations of a -// container and returns its effective RDT class. -func (c *criService) rdtClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { - cls, err := rdt.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations) - - if err == nil { - // Our internal check that RDT has been enabled - if cls != "" && !rdt.IsEnabled() { - err = fmt.Errorf("RDT disabled, refusing to set RDT class of container %q to %q", containerName, cls) - } - } - - if err != nil { - if !rdt.IsEnabled() && c.config.ContainerdConfig.IgnoreRdtNotEnabledErrors { - log.L.Debugf("continuing create container %s, ignoring rdt not enabled (%v)", containerName, err) - return "", nil - } - return "", err - } - - return cls, nil -} diff --git a/pkg/cri/sbserver/rdt_stub.go b/pkg/cri/sbserver/rdt_stub.go deleted file mode 100644 index d362ac2ac..000000000 --- a/pkg/cri/sbserver/rdt_stub.go +++ /dev/null @@ -1,23 +0,0 @@ -//go:build no_rdt - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -func (c *criService) rdtClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) { - return "", nil -} diff --git a/pkg/cri/sbserver/restart.go b/pkg/cri/sbserver/restart.go deleted file mode 100644 index 8ed261842..000000000 --- a/pkg/cri/sbserver/restart.go +++ /dev/null @@ -1,492 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "os" - "path/filepath" - "sync" - "time" - - "github.com/containerd/containerd" - containerdio "github.com/containerd/containerd/cio" - "github.com/containerd/containerd/errdefs" - containerdimages "github.com/containerd/containerd/images" - criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/sbserver/podsandbox" - "github.com/containerd/containerd/pkg/netns" - "github.com/containerd/containerd/platforms" - "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - "golang.org/x/sync/errgroup" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - cio "github.com/containerd/containerd/pkg/cri/io" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" -) - -// NOTE: The recovery logic has following assumption: when the cri plugin is down: -// 1) Files (e.g. root directory, netns) and checkpoint maintained by the plugin MUST NOT be -// touched. Or else, recovery logic for those containers/sandboxes may return error. -// 2) Containerd containers may be deleted, but SHOULD NOT be added. Or else, recovery logic -// for the newly added container/sandbox will return error, because there is no corresponding root -// directory created. -// 3) Containerd container tasks may exit or be stopped, deleted. Even though current logic could -// tolerant tasks being created or started, we prefer that not to happen. - -// recover recovers system state from containerd and status checkpoint. -func (c *criService) recover(ctx context.Context) error { - // Recover all sandboxes. - sandboxes, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindSandbox)) - if err != nil { - return fmt.Errorf("failed to list sandbox containers: %w", err) - } - - podSandboxController, ok := c.sandboxControllers[criconfig.ModePodSandbox] - if !ok { - log.G(ctx).Fatal("unable to restore pod sandboxes, no controller found") - } - - podSandboxLoader, ok := podSandboxController.(podSandboxRecover) - if !ok { - log.G(ctx).Fatal("pod sandbox controller doesn't support recovery") - } - - eg, ctx2 := errgroup.WithContext(ctx) - for _, sandbox := range sandboxes { - sandbox := sandbox - eg.Go(func() error { - sb, err := podSandboxLoader.RecoverContainer(ctx2, sandbox) - if err != nil { - log.G(ctx2). - WithError(err). - WithField("sandbox", sandbox.ID()). - Error("Failed to load sandbox") - - return nil - } - log.G(ctx2).Debugf("Loaded sandbox %+v", sb) - if err := c.sandboxStore.Add(sb); err != nil { - return fmt.Errorf("failed to add sandbox %q to store: %w", sandbox.ID(), err) - } - if err := c.sandboxNameIndex.Reserve(sb.Name, sb.ID); err != nil { - return fmt.Errorf("failed to reserve sandbox name %q: %w", sb.Name, err) - } - return nil - }) - } - if err := eg.Wait(); err != nil { - return err - } - - // Recover sandboxes in the new SandboxStore - storedSandboxes, err := c.client.SandboxStore().List(ctx) - if err != nil { - return fmt.Errorf("failed to list sandboxes from API: %w", err) - } - for _, sbx := range storedSandboxes { - if _, err := c.sandboxStore.Get(sbx.ID); err == nil { - continue - } - - metadata := sandboxstore.Metadata{} - err := sbx.GetExtension(podsandbox.MetadataKey, &metadata) - if err != nil { - return fmt.Errorf("failed to get metadata for stored sandbox %q: %w", sbx.ID, err) - } - - var ( - state = sandboxstore.StateUnknown - controller = c.sandboxControllers[criconfig.ModeShim] - ) - - status, err := controller.Status(ctx, sbx.ID, false) - if err != nil { - log.G(ctx). - WithError(err). - WithField("sandbox", sbx.ID). - Error("failed to recover sandbox state") - - if errdefs.IsNotFound(err) { - state = sandboxstore.StateNotReady - } - } else { - if code, ok := runtime.PodSandboxState_value[status.State]; ok { - if code == int32(runtime.PodSandboxState_SANDBOX_READY) { - state = sandboxstore.StateReady - } else if code == int32(runtime.PodSandboxState_SANDBOX_NOTREADY) { - state = sandboxstore.StateNotReady - } - } - } - - sb := sandboxstore.NewSandbox(metadata, sandboxstore.Status{State: state}) - - // Load network namespace. - sb.NetNS = getNetNS(&metadata) - - if err := c.sandboxStore.Add(sb); err != nil { - return fmt.Errorf("failed to add stored sandbox %q to store: %w", sbx.ID, err) - } - } - - // Recover all containers. - containers, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindContainer)) - if err != nil { - return fmt.Errorf("failed to list containers: %w", err) - } - eg, ctx2 = errgroup.WithContext(ctx) - for _, container := range containers { - container := container - eg.Go(func() error { - cntr, err := c.loadContainer(ctx2, container) - if err != nil { - log.G(ctx2). - WithError(err). - WithField("container", container.ID()). - Error("Failed to load container") - - return nil - } - log.G(ctx2).Debugf("Loaded container %+v", cntr) - if err := c.containerStore.Add(cntr); err != nil { - return fmt.Errorf("failed to add container %q to store: %w", container.ID(), err) - } - if err := c.containerNameIndex.Reserve(cntr.Name, cntr.ID); err != nil { - return fmt.Errorf("failed to reserve container name %q: %w", cntr.Name, err) - } - return nil - }) - } - if err := eg.Wait(); err != nil { - return err - } - - // Recover all images. - cImages, err := c.client.ListImages(ctx) - if err != nil { - return fmt.Errorf("failed to list images: %w", err) - } - c.loadImages(ctx, cImages) - - // It's possible that containerd containers are deleted unexpectedly. In that case, - // we can't even get metadata, we should cleanup orphaned sandbox/container directories - // with best effort. - - // Cleanup orphaned sandbox and container directories without corresponding containerd container. - for _, cleanup := range []struct { - cntrs []containerd.Container - base string - errMsg string - }{ - { - cntrs: sandboxes, - base: filepath.Join(c.config.RootDir, sandboxesDir), - errMsg: "failed to cleanup orphaned sandbox directories", - }, - { - cntrs: sandboxes, - base: filepath.Join(c.config.StateDir, sandboxesDir), - errMsg: "failed to cleanup orphaned volatile sandbox directories", - }, - { - cntrs: containers, - base: filepath.Join(c.config.RootDir, containersDir), - errMsg: "failed to cleanup orphaned container directories", - }, - { - cntrs: containers, - base: filepath.Join(c.config.StateDir, containersDir), - errMsg: "failed to cleanup orphaned volatile container directories", - }, - } { - if err := cleanupOrphanedIDDirs(ctx, cleanup.cntrs, cleanup.base); err != nil { - return fmt.Errorf("%s: %w", cleanup.errMsg, err) - } - } - return nil -} - -// loadContainerTimeout is the default timeout for loading a container/sandbox. -// One container/sandbox hangs (e.g. containerd#2438) should not affect other -// containers/sandboxes. -// Most CRI container/sandbox related operations are per container, the ones -// which handle multiple containers at a time are: -// * ListPodSandboxes: Don't talk with containerd services. -// * ListContainers: Don't talk with containerd services. -// * ListContainerStats: Not in critical code path, a default timeout will -// be applied at CRI level. -// * Recovery logic: We should set a time for each container/sandbox recovery. -// * Event monitor: We should set a timeout for each container/sandbox event handling. -const loadContainerTimeout = 10 * time.Second - -// loadContainer loads container from containerd and status checkpoint. -func (c *criService) loadContainer(ctx context.Context, cntr containerd.Container) (containerstore.Container, error) { - ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout) - defer cancel() - id := cntr.ID() - containerDir := c.getContainerRootDir(id) - volatileContainerDir := c.getVolatileContainerRootDir(id) - var container containerstore.Container - // Load container metadata. - exts, err := cntr.Extensions(ctx) - if err != nil { - return container, fmt.Errorf("failed to get container extensions: %w", err) - } - ext, ok := exts[containerMetadataExtension] - if !ok { - return container, fmt.Errorf("metadata extension %q not found", containerMetadataExtension) - } - data, err := typeurl.UnmarshalAny(ext) - if err != nil { - return container, fmt.Errorf("failed to unmarshal metadata extension %q: %w", ext, err) - } - meta := data.(*containerstore.Metadata) - - // Load status from checkpoint. - status, err := containerstore.LoadStatus(containerDir, id) - if err != nil { - log.G(ctx).WithError(err).Warnf("Failed to load container status for %q", id) - status = unknownContainerStatus() - } - - var containerIO *cio.ContainerIO - err = func() error { - // Load up-to-date status from containerd. - t, err := cntr.Task(ctx, func(fifos *containerdio.FIFOSet) (_ containerdio.IO, err error) { - stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, meta.Config.GetTty()) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - if stdoutWC != nil { - stdoutWC.Close() - } - if stderrWC != nil { - stderrWC.Close() - } - } - }() - containerIO, err = cio.NewContainerIO(id, - cio.WithFIFOs(fifos), - ) - if err != nil { - return nil, err - } - containerIO.AddOutput("log", stdoutWC, stderrWC) - containerIO.Pipe() - return containerIO, nil - }) - if err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to load task: %w", err) - } - var s containerd.Status - var notFound bool - if errdefs.IsNotFound(err) { - // Task is not found. - notFound = true - } else { - // Task is found. Get task status. - s, err = t.Status(ctx) - if err != nil { - // It's still possible that task is deleted during this window. - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get task status: %w", err) - } - notFound = true - } - } - if notFound { - // Task is not created or has been deleted, use the checkpointed status - // to generate container status. - switch status.State() { - case runtime.ContainerState_CONTAINER_CREATED: - // NOTE: Another possibility is that we've tried to start the container, but - // containerd got restarted during that. In that case, we still - // treat the container as `CREATED`. - containerIO, err = cio.NewContainerIO(id, - cio.WithNewFIFOs(volatileContainerDir, meta.Config.GetTty(), meta.Config.GetStdin()), - ) - if err != nil { - return fmt.Errorf("failed to create container io: %w", err) - } - case runtime.ContainerState_CONTAINER_RUNNING: - // Container was in running state, but its task has been deleted, - // set unknown exited state. Container io is not needed in this case. - status.FinishedAt = time.Now().UnixNano() - status.ExitCode = unknownExitCode - status.Reason = unknownExitReason - default: - // Container is in exited/unknown state, return the status as it is. - } - } else { - // Task status is found. Update container status based on the up-to-date task status. - switch s.Status { - case containerd.Created: - // Task has been created, but not started yet. This could only happen if containerd - // gets restarted during container start. - // Container must be in `CREATED` state. - if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to delete task: %w", err) - } - if status.State() != runtime.ContainerState_CONTAINER_CREATED { - return fmt.Errorf("unexpected container state for created task: %q", status.State()) - } - case containerd.Running: - // Task is running. Container must be in `RUNNING` state, based on our assumption that - // "task should not be started when containerd is down". - switch status.State() { - case runtime.ContainerState_CONTAINER_EXITED: - return fmt.Errorf("unexpected container state for running task: %q", status.State()) - case runtime.ContainerState_CONTAINER_RUNNING: - default: - // This may happen if containerd gets restarted after task is started, but - // before status is checkpointed. - status.StartedAt = time.Now().UnixNano() - status.Pid = t.Pid() - } - // Wait for the task for exit monitor. - // wait is a long running background request, no timeout needed. - exitCh, err := t.Wait(ctrdutil.NamespacedContext()) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to wait for task: %w", err) - } - // Container was in running state, but its task has been deleted, - // set unknown exited state. - status.FinishedAt = time.Now().UnixNano() - status.ExitCode = unknownExitCode - status.Reason = unknownExitReason - } else { - // Start exit monitor. - c.eventMonitor.startContainerExitMonitor(context.Background(), id, status.Pid, exitCh) - } - case containerd.Stopped: - // Task is stopped. Update status and delete the task. - if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to delete task: %w", err) - } - status.FinishedAt = s.ExitTime.UnixNano() - status.ExitCode = int32(s.ExitStatus) - default: - return fmt.Errorf("unexpected task status %q", s.Status) - } - } - return nil - }() - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to load container status for %q", id) - // Only set the unknown field in this case, because other fields may - // contain useful information loaded from the checkpoint. - status.Unknown = true - } - opts := []containerstore.Opts{ - containerstore.WithStatus(status, containerDir), - containerstore.WithContainer(cntr), - } - // containerIO could be nil for container in unknown state. - if containerIO != nil { - opts = append(opts, containerstore.WithContainerIO(containerIO)) - } - return containerstore.NewContainer(*meta, opts...) -} - -// podSandboxRecover is an additional interface implemented by podsandbox/ controller to handle -// Pod sandbox containers recovery. -type podSandboxRecover interface { - RecoverContainer(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) -} - -func getNetNS(meta *sandboxstore.Metadata) *netns.NetNS { - // Don't need to load netns for host network sandbox. - if hostNetwork(meta.Config) { - return nil - } - return netns.LoadNetNS(meta.NetNSPath) -} - -// loadImages loads images from containerd. -func (c *criService) loadImages(ctx context.Context, cImages []containerd.Image) { - snapshotter := c.config.ContainerdConfig.Snapshotter - var wg sync.WaitGroup - for _, i := range cImages { - wg.Add(1) - i := i - go func() { - defer wg.Done() - ok, _, _, _, err := containerdimages.Check(ctx, i.ContentStore(), i.Target(), platforms.Default()) - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to check image content readiness for %q", i.Name()) - return - } - if !ok { - log.G(ctx).Warnf("The image content readiness for %q is not ok", i.Name()) - return - } - // Checking existence of top-level snapshot for each image being recovered. - unpacked, err := i.IsUnpacked(ctx, snapshotter) - if err != nil { - log.G(ctx).WithError(err).Warnf("Failed to check whether image is unpacked for image %s", i.Name()) - return - } - if !unpacked { - log.G(ctx).Warnf("The image %s is not unpacked.", i.Name()) - // TODO(random-liu): Consider whether we should try unpack here. - } - if err := c.UpdateImage(ctx, i.Name()); err != nil { - log.G(ctx).WithError(err).Warnf("Failed to update reference for image %q", i.Name()) - return - } - log.G(ctx).Debugf("Loaded image %q", i.Name()) - }() - } - wg.Wait() -} - -func cleanupOrphanedIDDirs(ctx context.Context, cntrs []containerd.Container, base string) error { - // Cleanup orphaned id directories. - dirs, err := os.ReadDir(base) - if err != nil && !os.IsNotExist(err) { - return fmt.Errorf("failed to read base directory: %w", err) - } - idsMap := make(map[string]containerd.Container) - for _, cntr := range cntrs { - idsMap[cntr.ID()] = cntr - } - for _, d := range dirs { - if !d.IsDir() { - log.G(ctx).Warnf("Invalid file %q found in base directory %q", d.Name(), base) - continue - } - if _, ok := idsMap[d.Name()]; ok { - // Do not remove id directory if corresponding container is found. - continue - } - dir := filepath.Join(base, d.Name()) - if err := ensureRemoveAll(ctx, dir); err != nil { - log.G(ctx).WithError(err).Warnf("Failed to remove id directory %q", dir) - } else { - log.G(ctx).Debugf("Cleanup orphaned id directory %q", dir) - } - } - return nil -} diff --git a/pkg/cri/sbserver/runtime_config.go b/pkg/cri/sbserver/runtime_config.go deleted file mode 100644 index 37769f10d..000000000 --- a/pkg/cri/sbserver/runtime_config.go +++ /dev/null @@ -1,31 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// RuntimeConfig returns configuration information of the runtime. -func (c *criService) RuntimeConfig(ctx context.Context, r *runtime.RuntimeConfigRequest) (*runtime.RuntimeConfigResponse, error) { - resp := &runtime.RuntimeConfigResponse{ - Linux: c.getLinuxRuntimeConfig(ctx), - } - return resp, nil -} diff --git a/pkg/cri/sbserver/runtime_config_linux.go b/pkg/cri/sbserver/runtime_config_linux.go deleted file mode 100644 index e5046d853..000000000 --- a/pkg/cri/sbserver/runtime_config_linux.go +++ /dev/null @@ -1,81 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "sort" - - "github.com/containerd/containerd/pkg/systemd" - runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" - "github.com/containerd/log" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration { - return &runtime.LinuxRuntimeConfiguration{CgroupDriver: c.getCgroupDriver(ctx)} -} - -func (c *criService) getCgroupDriver(ctx context.Context) runtime.CgroupDriver { - // Go through the runtime handlers in a predictable order, starting from the - // default handler, others sorted in alphabetical order - handlerNames := make([]string, 0, len(c.config.ContainerdConfig.Runtimes)) - for n := range c.config.ContainerdConfig.Runtimes { - handlerNames = append(handlerNames, n) - } - sort.Slice(handlerNames, func(i, j int) bool { - if handlerNames[i] == c.config.ContainerdConfig.DefaultRuntimeName { - return true - } - if handlerNames[j] == c.config.ContainerdConfig.DefaultRuntimeName { - return false - } - return handlerNames[i] < handlerNames[j] - }) - - for _, handler := range handlerNames { - opts, err := generateRuntimeOptions(c.config.ContainerdConfig.Runtimes[handler]) - if err != nil { - log.G(ctx).Debugf("failed to parse runtime handler options for %q", handler) - continue - } - if d, ok := getCgroupDriverFromRuntimeHandlerOpts(opts); ok { - return d - } - log.G(ctx).Debugf("runtime handler %q does not provide cgroup driver information", handler) - } - - // If no runtime handlers have a setting, detect if systemd is running - d := runtime.CgroupDriver_CGROUPFS - if systemd.IsRunningSystemd() { - d = runtime.CgroupDriver_SYSTEMD - } - log.G(ctx).Debugf("no runtime handler provided cgroup driver setting, using auto-detected %s", runtime.CgroupDriver_name[int32(d)]) - return d -} - -func getCgroupDriverFromRuntimeHandlerOpts(opts interface{}) (runtime.CgroupDriver, bool) { - switch v := opts.(type) { - case *runcoptions.Options: - systemdCgroup := v.SystemdCgroup - if systemdCgroup { - return runtime.CgroupDriver_SYSTEMD, true - } - return runtime.CgroupDriver_CGROUPFS, true - } - return runtime.CgroupDriver_SYSTEMD, false -} diff --git a/pkg/cri/sbserver/runtime_config_linux_test.go b/pkg/cri/sbserver/runtime_config_linux_test.go deleted file mode 100644 index de53523b9..000000000 --- a/pkg/cri/sbserver/runtime_config_linux_test.go +++ /dev/null @@ -1,105 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "testing" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/systemd" - "github.com/containerd/containerd/plugin" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func newFakeRuntimeConfig(runcV2, systemdCgroup bool) criconfig.Runtime { - r := criconfig.Runtime{Type: "default", Options: map[string]interface{}{}} - if runcV2 { - r.Type = plugin.RuntimeRuncV2 - if systemdCgroup { - r.Options["SystemdCgroup"] = true - } - } - return r -} - -func TestRuntimeConfig(t *testing.T) { - autoDetected := runtime.CgroupDriver_CGROUPFS - if systemd.IsRunningSystemd() { - autoDetected = runtime.CgroupDriver_SYSTEMD - } - - for _, test := range []struct { - desc string - defaultRuntime string - runtimes map[string]criconfig.Runtime - expectedCgroupDriver runtime.CgroupDriver - }{ - { - desc: "no runtimes", - expectedCgroupDriver: autoDetected, - }, - { - desc: "non-runc runtime", - defaultRuntime: "non-runc", - runtimes: map[string]criconfig.Runtime{"non-runc": newFakeRuntimeConfig(false, false)}, - expectedCgroupDriver: autoDetected, - }, - { - desc: "no default, pick first in alphabetical order", - runtimes: map[string]criconfig.Runtime{ - "non-runc": newFakeRuntimeConfig(false, false), - "runc-2": newFakeRuntimeConfig(true, true), - "runc": newFakeRuntimeConfig(true, false), - "non-runc-2": newFakeRuntimeConfig(false, false), - }, - expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS, - }, - { - desc: "pick default, cgroupfs", - defaultRuntime: "runc-2", - runtimes: map[string]criconfig.Runtime{ - "non-runc": newFakeRuntimeConfig(false, false), - "runc": newFakeRuntimeConfig(true, true), - "runc-2": newFakeRuntimeConfig(true, false), - }, - expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS, - }, - { - desc: "pick default, systemd", - defaultRuntime: "runc-2", - runtimes: map[string]criconfig.Runtime{ - "non-runc": newFakeRuntimeConfig(false, false), - "runc": newFakeRuntimeConfig(true, false), - "runc-2": newFakeRuntimeConfig(true, true), - }, - expectedCgroupDriver: runtime.CgroupDriver_SYSTEMD, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.config.PluginConfig.ContainerdConfig.DefaultRuntimeName = test.defaultRuntime - c.config.PluginConfig.ContainerdConfig.Runtimes = test.runtimes - - resp, err := c.RuntimeConfig(context.TODO(), &runtime.RuntimeConfigRequest{}) - assert.NoError(t, err) - assert.Equal(t, test.expectedCgroupDriver, resp.Linux.CgroupDriver, "got unexpected cgroup driver") - }) - } -} diff --git a/pkg/cri/sbserver/runtime_config_other.go b/pkg/cri/sbserver/runtime_config_other.go deleted file mode 100644 index 7559472ff..000000000 --- a/pkg/cri/sbserver/runtime_config_other.go +++ /dev/null @@ -1,29 +0,0 @@ -//go:build !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration { - return nil -} diff --git a/pkg/cri/sbserver/sandbox_list.go b/pkg/cri/sbserver/sandbox_list.go deleted file mode 100644 index 4cae5a6f4..000000000 --- a/pkg/cri/sbserver/sandbox_list.go +++ /dev/null @@ -1,112 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "time" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -// ListPodSandbox returns a list of Sandbox. -func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) { - start := time.Now() - // List all sandboxes from store. - sandboxesInStore := c.sandboxStore.List() - var sandboxes []*runtime.PodSandbox - for _, sandboxInStore := range sandboxesInStore { - sandboxes = append(sandboxes, toCRISandbox( - sandboxInStore.Metadata, - sandboxInStore.Status.Get(), - )) - } - - sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter()) - - sandboxListTimer.UpdateSince(start) - return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil -} - -// toCRISandbox converts sandbox metadata into CRI pod sandbox. -func toCRISandbox(meta sandboxstore.Metadata, status sandboxstore.Status) *runtime.PodSandbox { - // Set sandbox state to NOTREADY by default. - state := runtime.PodSandboxState_SANDBOX_NOTREADY - if status.State == sandboxstore.StateReady { - state = runtime.PodSandboxState_SANDBOX_READY - } - return &runtime.PodSandbox{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - State: state, - CreatedAt: status.CreatedAt.UnixNano(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - RuntimeHandler: meta.RuntimeHandler, - } -} - -func (c *criService) normalizePodSandboxFilter(filter *runtime.PodSandboxFilter) { - if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil { - filter.Id = sb.ID - } -} - -func (c *criService) normalizePodSandboxStatsFilter(filter *runtime.PodSandboxStatsFilter) { - if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil { - filter.Id = sb.ID - } -} - -// filterCRISandboxes filters CRISandboxes. -func (c *criService) filterCRISandboxes(sandboxes []*runtime.PodSandbox, filter *runtime.PodSandboxFilter) []*runtime.PodSandbox { - if filter == nil { - return sandboxes - } - - c.normalizePodSandboxFilter(filter) - filtered := []*runtime.PodSandbox{} - for _, s := range sandboxes { - // Filter by id - if filter.GetId() != "" && filter.GetId() != s.Id { - continue - } - // Filter by state - if filter.GetState() != nil && filter.GetState().GetState() != s.State { - continue - } - // Filter by label - if filter.GetLabelSelector() != nil { - match := true - for k, v := range filter.GetLabelSelector() { - got, ok := s.Labels[k] - if !ok || got != v { - match = false - break - } - } - if !match { - continue - } - } - filtered = append(filtered, s) - } - - return filtered -} diff --git a/pkg/cri/sbserver/sandbox_list_test.go b/pkg/cri/sbserver/sandbox_list_test.go deleted file mode 100644 index 0bed867fd..000000000 --- a/pkg/cri/sbserver/sandbox_list_test.go +++ /dev/null @@ -1,225 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -func TestToCRISandbox(t *testing.T) { - config := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-name", - Uid: "test-uid", - Namespace: "test-ns", - Attempt: 1, - }, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - } - createdAt := time.Now() - meta := sandboxstore.Metadata{ - ID: "test-id", - Name: "test-name", - Config: config, - NetNSPath: "test-netns", - RuntimeHandler: "test-runtime-handler", - } - expect := &runtime.PodSandbox{ - Id: "test-id", - Metadata: config.GetMetadata(), - CreatedAt: createdAt.UnixNano(), - Labels: config.GetLabels(), - Annotations: config.GetAnnotations(), - RuntimeHandler: "test-runtime-handler", - } - for _, test := range []struct { - desc string - state sandboxstore.State - expectedState runtime.PodSandboxState - }{ - { - desc: "sandbox state ready", - state: sandboxstore.StateReady, - expectedState: runtime.PodSandboxState_SANDBOX_READY, - }, - { - desc: "sandbox state not ready", - state: sandboxstore.StateNotReady, - expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, - }, - { - desc: "sandbox state unknown", - state: sandboxstore.StateUnknown, - expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - status := sandboxstore.Status{ - CreatedAt: createdAt, - State: test.state, - } - expect.State = test.expectedState - s := toCRISandbox(meta, status) - assert.Equal(t, expect, s, test.desc) - }) - } -} - -func TestFilterSandboxes(t *testing.T) { - c := newTestCRIService() - sandboxes := []sandboxstore.Sandbox{ - sandboxstore.NewSandbox( - sandboxstore.Metadata{ - ID: "1abcdef", - Name: "sandboxname-1", - Config: &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "podname-1", - Uid: "uid-1", - Namespace: "ns-1", - Attempt: 1, - }, - }, - RuntimeHandler: "test-runtime-handler", - }, - sandboxstore.Status{ - CreatedAt: time.Now(), - State: sandboxstore.StateReady, - }, - ), - sandboxstore.NewSandbox( - sandboxstore.Metadata{ - ID: "2abcdef", - Name: "sandboxname-2", - Config: &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "podname-2", - Uid: "uid-2", - Namespace: "ns-2", - Attempt: 2, - }, - Labels: map[string]string{"a": "b"}, - }, - RuntimeHandler: "test-runtime-handler", - }, - sandboxstore.Status{ - CreatedAt: time.Now(), - State: sandboxstore.StateNotReady, - }, - ), - sandboxstore.NewSandbox( - sandboxstore.Metadata{ - ID: "3abcdef", - Name: "sandboxname-3", - Config: &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "podname-2", - Uid: "uid-2", - Namespace: "ns-2", - Attempt: 2, - }, - Labels: map[string]string{"c": "d"}, - }, - RuntimeHandler: "test-runtime-handler", - }, - sandboxstore.Status{ - CreatedAt: time.Now(), - State: sandboxstore.StateReady, - }, - ), - } - - // Create PodSandbox - testSandboxes := []*runtime.PodSandbox{} - for _, sb := range sandboxes { - testSandboxes = append(testSandboxes, toCRISandbox(sb.Metadata, sb.Status.Get())) - } - - // Inject test sandbox metadata - for _, sb := range sandboxes { - assert.NoError(t, c.sandboxStore.Add(sb)) - } - - for _, test := range []struct { - desc string - filter *runtime.PodSandboxFilter - expect []*runtime.PodSandbox - }{ - { - desc: "no filter", - expect: testSandboxes, - }, - { - desc: "id filter", - filter: &runtime.PodSandboxFilter{Id: "2abcdef"}, - expect: []*runtime.PodSandbox{testSandboxes[1]}, - }, - { - desc: "truncid filter", - filter: &runtime.PodSandboxFilter{Id: "2"}, - expect: []*runtime.PodSandbox{testSandboxes[1]}, - }, - { - desc: "state filter", - filter: &runtime.PodSandboxFilter{ - State: &runtime.PodSandboxStateValue{ - State: runtime.PodSandboxState_SANDBOX_READY, - }, - }, - expect: []*runtime.PodSandbox{testSandboxes[0], testSandboxes[2]}, - }, - { - desc: "label filter", - filter: &runtime.PodSandboxFilter{ - LabelSelector: map[string]string{"a": "b"}, - }, - expect: []*runtime.PodSandbox{testSandboxes[1]}, - }, - { - desc: "mixed filter not matched", - filter: &runtime.PodSandboxFilter{ - Id: "1", - LabelSelector: map[string]string{"a": "b"}, - }, - expect: []*runtime.PodSandbox{}, - }, - { - desc: "mixed filter matched", - filter: &runtime.PodSandboxFilter{ - State: &runtime.PodSandboxStateValue{ - State: runtime.PodSandboxState_SANDBOX_READY, - }, - LabelSelector: map[string]string{"c": "d"}, - }, - expect: []*runtime.PodSandbox{testSandboxes[2]}, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - filtered := c.filterCRISandboxes(testSandboxes, test.filter) - assert.Equal(t, test.expect, filtered, test.desc) - }) - } -} diff --git a/pkg/cri/sbserver/sandbox_portforward.go b/pkg/cri/sbserver/sandbox_portforward.go deleted file mode 100644 index b87ab89c9..000000000 --- a/pkg/cri/sbserver/sandbox_portforward.go +++ /dev/null @@ -1,40 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -// PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address. -func (c *criService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (retRes *runtime.PortForwardResponse, retErr error) { - sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) - if err != nil { - return nil, fmt.Errorf("failed to find sandbox %q: %w", r.GetPodSandboxId(), err) - } - if sandbox.Status.Get().State != sandboxstore.StateReady { - return nil, errors.New("sandbox container is not running") - } - // TODO(random-liu): Verify that ports are exposed. - return c.streamServer.GetPortForward(r) -} diff --git a/pkg/cri/sbserver/sandbox_portforward_linux.go b/pkg/cri/sbserver/sandbox_portforward_linux.go deleted file mode 100644 index 1ff318a5f..000000000 --- a/pkg/cri/sbserver/sandbox_portforward_linux.go +++ /dev/null @@ -1,135 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "io" - "net" - "time" - - "github.com/containerd/log" - "github.com/containernetworking/plugins/pkg/ns" -) - -// portForward uses netns to enter the sandbox namespace, and forwards a stream inside the -// namespace to a specific port. It keeps forwarding until it exits or client disconnect. -func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriteCloser) error { - s, err := c.sandboxStore.Get(id) - if err != nil { - return fmt.Errorf("failed to find sandbox %q in store: %w", id, err) - } - - var ( - netNSDo func(func(ns.NetNS) error) error - // netNSPath is the network namespace path for logging. - netNSPath string - ) - if !hostNetwork(s.Config) { - if closed, err := s.NetNS.Closed(); err != nil { - return fmt.Errorf("failed to check netwok namespace closed for sandbox %q: %w", id, err) - } else if closed { - return fmt.Errorf("network namespace for sandbox %q is closed", id) - } - netNSDo = s.NetNS.Do - netNSPath = s.NetNS.GetPath() - } else { - // Run the function directly for host network. - netNSDo = func(do func(_ ns.NetNS) error) error { - return do(nil) - } - netNSPath = "host" - } - - log.G(ctx).Infof("Executing port forwarding in network namespace %q", netNSPath) - err = netNSDo(func(_ ns.NetNS) error { - defer stream.Close() - // localhost can resolve to both IPv4 and IPv6 addresses in dual-stack systems - // but the application can be listening in one of the IP families only. - // golang has enabled RFC 6555 Fast Fallback (aka HappyEyeballs) by default in 1.12 - // It means that if a host resolves to both IPv6 and IPv4, it will try to connect to any - // of those addresses and use the working connection. - // However, the implementation uses goroutines to start both connections in parallel, - // and this cases that the connection is done outside the namespace, so we try to connect - // serially. - // We try IPv4 first to keep current behavior and we fallback to IPv6 if the connection fails. - // xref https://github.com/golang/go/issues/44922 - var conn net.Conn - conn, err := net.Dial("tcp4", fmt.Sprintf("localhost:%d", port)) - if err != nil { - var errV6 error - conn, errV6 = net.Dial("tcp6", fmt.Sprintf("localhost:%d", port)) - if errV6 != nil { - return fmt.Errorf("failed to connect to localhost:%d inside namespace %q, IPv4: %v IPv6 %v ", port, id, err, errV6) - } - } - defer conn.Close() - - errCh := make(chan error, 2) - // Copy from the namespace port connection to the client stream - go func() { - log.G(ctx).Debugf("PortForward copying data from namespace %q port %d to the client stream", id, port) - _, err := io.Copy(stream, conn) - errCh <- err - }() - - // Copy from the client stream to the namespace port connection - go func() { - log.G(ctx).Debugf("PortForward copying data from client stream to namespace %q port %d", id, port) - _, err := io.Copy(conn, stream) - errCh <- err - }() - - // Wait until the first error is returned by one of the connections - // we use errFwd to store the result of the port forwarding operation - // if the context is cancelled close everything and return - var errFwd error - select { - case errFwd = <-errCh: - log.G(ctx).Debugf("PortForward stop forwarding in one direction in network namespace %q port %d: %v", id, port, errFwd) - case <-ctx.Done(): - log.G(ctx).Debugf("PortForward cancelled in network namespace %q port %d: %v", id, port, ctx.Err()) - return ctx.Err() - } - // give a chance to terminate gracefully or timeout - // after 1s - // https://linux.die.net/man/1/socat - const timeout = time.Second - select { - case e := <-errCh: - if errFwd == nil { - errFwd = e - } - log.G(ctx).Debugf("PortForward stopped forwarding in both directions in network namespace %q port %d: %v", id, port, e) - case <-time.After(timeout): - log.G(ctx).Debugf("PortForward timed out waiting to close the connection in network namespace %q port %d", id, port) - case <-ctx.Done(): - log.G(ctx).Debugf("PortForward cancelled in network namespace %q port %d: %v", id, port, ctx.Err()) - errFwd = ctx.Err() - } - - return errFwd - }) - - if err != nil { - return fmt.Errorf("failed to execute portforward in network namespace %q: %w", netNSPath, err) - } - log.G(ctx).Infof("Finish port forwarding for %q port %d", id, port) - - return nil -} diff --git a/pkg/cri/sbserver/sandbox_portforward_other.go b/pkg/cri/sbserver/sandbox_portforward_other.go deleted file mode 100644 index cadbae0c8..000000000 --- a/pkg/cri/sbserver/sandbox_portforward_other.go +++ /dev/null @@ -1,33 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "io" - - "github.com/containerd/containerd/errdefs" -) - -// portForward uses netns to enter the sandbox namespace, and forwards a stream inside the -// namespace to a specific port. It keeps forwarding until it exits or client disconnect. -func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriteCloser) error { - return fmt.Errorf("port forward: %w", errdefs.ErrNotImplemented) -} diff --git a/pkg/cri/sbserver/sandbox_portforward_windows.go b/pkg/cri/sbserver/sandbox_portforward_windows.go deleted file mode 100644 index 876a36c51..000000000 --- a/pkg/cri/sbserver/sandbox_portforward_windows.go +++ /dev/null @@ -1,77 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "bytes" - "context" - "fmt" - "io" - - "k8s.io/utils/exec" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - cioutil "github.com/containerd/containerd/pkg/ioutil" -) - -func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error { - stdout := cioutil.NewNopWriteCloser(stream) - stderrBuffer := new(bytes.Buffer) - stderr := cioutil.NewNopWriteCloser(stderrBuffer) - // localhost is resolved to 127.0.0.1 in ipv4, and ::1 in ipv6. - // Explicitly using ipv4 IP address in here to avoid flakiness. - cmd := []string{"wincat.exe", "127.0.0.1", fmt.Sprint(port)} - err := c.execInSandbox(ctx, id, cmd, stream, stdout, stderr) - if err != nil { - return fmt.Errorf("failed to execute port forward in sandbox: %s: %w", stderrBuffer.String(), err) - } - return nil -} - -func (c *criService) execInSandbox(ctx context.Context, sandboxID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser) error { - // Get sandbox from our sandbox store. - sb, err := c.sandboxStore.Get(sandboxID) - if err != nil { - return fmt.Errorf("failed to find sandbox %q in store: %w", sandboxID, err) - } - - // Check the sandbox state - state := sb.Status.Get().State - if state != sandboxstore.StateReady { - return fmt.Errorf("sandbox is in %s state", fmt.Sprint(state)) - } - - opts := execOptions{ - cmd: cmd, - stdin: stdin, - stdout: stdout, - stderr: stderr, - tty: false, - resize: nil, - } - exitCode, err := c.execInternal(ctx, sb.Container, sandboxID, opts) - if err != nil { - return fmt.Errorf("failed to exec in sandbox: %w", err) - } - if *exitCode == 0 { - return nil - } - return &exec.CodeExitError{ - Err: fmt.Errorf("error executing command %v, exit code %d", cmd, *exitCode), - Code: int(*exitCode), - } -} diff --git a/pkg/cri/sbserver/sandbox_remove.go b/pkg/cri/sbserver/sandbox_remove.go deleted file mode 100644 index 678ee6e01..000000000 --- a/pkg/cri/sbserver/sandbox_remove.go +++ /dev/null @@ -1,117 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "time" - - "github.com/containerd/containerd/errdefs" - "github.com/containerd/log" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// RemovePodSandbox removes the sandbox. If there are running containers in the -// sandbox, they should be forcibly removed. -func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) { - start := time.Now() - sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) - if err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("an error occurred when try to find sandbox %q: %w", - r.GetPodSandboxId(), err) - } - // Do not return error if the id doesn't exist. - log.G(ctx).Tracef("RemovePodSandbox called for sandbox %q that does not exist", - r.GetPodSandboxId()) - return &runtime.RemovePodSandboxResponse{}, nil - } - // Use the full sandbox id. - id := sandbox.ID - - // If the sandbox is still running, not ready, or in an unknown state, forcibly stop it. - // Even if it's in a NotReady state, this will close its network namespace, if open. - // This can happen if the task process associated with the Pod died or it was killed. - log.G(ctx).Infof("Forcibly stopping sandbox %q", id) - if err := c.stopPodSandbox(ctx, sandbox); err != nil { - return nil, fmt.Errorf("failed to forcibly stop sandbox %q: %w", id, err) - } - - // Return error if sandbox network namespace is not closed yet. - if sandbox.NetNS != nil { - nsPath := sandbox.NetNS.GetPath() - if closed, err := sandbox.NetNS.Closed(); err != nil { - return nil, fmt.Errorf("failed to check sandbox network namespace %q closed: %w", nsPath, err) - } else if !closed { - return nil, fmt.Errorf("sandbox network namespace %q is not fully closed", nsPath) - } - } - - // Remove all containers inside the sandbox. - // NOTE(random-liu): container could still be created after this point, Kubelet should - // not rely on this behavior. - // TODO(random-liu): Introduce an intermediate state to avoid container creation after - // this point. - cntrs := c.containerStore.List() - for _, cntr := range cntrs { - if cntr.SandboxID != id { - continue - } - _, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID}) - if err != nil { - return nil, fmt.Errorf("failed to remove container %q: %w", cntr.ID, err) - } - } - - // Use sandbox controller to delete sandbox - controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox controller: %w", err) - } - - if err := controller.Shutdown(ctx, id); err != nil && !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to delete sandbox %q: %w", id, err) - } - - // Send CONTAINER_DELETED event with ContainerId equal to SandboxId. - c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_DELETED_EVENT) - - err = c.nri.RemovePodSandbox(ctx, &sandbox) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI pod removal notification failed") - } - - // Remove sandbox from sandbox store. Note that once the sandbox is successfully - // deleted: - // 1) ListPodSandbox will not include this sandbox. - // 2) PodSandboxStatus and StopPodSandbox will return error. - // 3) On-going operations which have held the reference will not be affected. - c.sandboxStore.Delete(id) - - if err := c.client.SandboxStore().Delete(ctx, id); err != nil { - return nil, fmt.Errorf("failed to remove sandbox metadata from store: %w", err) - } - - // Release the sandbox name reserved for the sandbox. - c.sandboxNameIndex.ReleaseByKey(id) - - sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start) - - return &runtime.RemovePodSandboxResponse{}, nil -} diff --git a/pkg/cri/sbserver/sandbox_run.go b/pkg/cri/sbserver/sandbox_run.go deleted file mode 100644 index f44a25f10..000000000 --- a/pkg/cri/sbserver/sandbox_run.go +++ /dev/null @@ -1,715 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "math" - "path/filepath" - "strings" - "time" - - "github.com/containerd/go-cni" - "github.com/containerd/typeurl/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/pkg/cri/annotations" - criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/sbserver/podsandbox" - "github.com/containerd/containerd/pkg/cri/server/bandwidth" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - "github.com/containerd/containerd/pkg/cri/util" - "github.com/containerd/containerd/pkg/netns" - sb "github.com/containerd/containerd/sandbox" - "github.com/containerd/log" -) - -func init() { - typeurl.Register(&sandboxstore.Metadata{}, - "github.com/containerd/cri/pkg/store/sandbox", "Metadata") -} - -// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure -// the sandbox is in ready state. -func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) { - config := r.GetConfig() - log.G(ctx).Debugf("Sandbox config %+v", config) - - // Generate unique id and name for the sandbox and reserve the name. - id := util.GenerateID() - metadata := config.GetMetadata() - if metadata == nil { - return nil, errors.New("sandbox config must include metadata") - } - name := makeSandboxName(metadata) - log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name) - - // cleanupErr records the last error returned by the critical cleanup operations in deferred functions, - // like CNI teardown and stopping the running sandbox task. - // If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox - // in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow. - var cleanupErr error - - // Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the - // same sandbox. - if err := c.sandboxNameIndex.Reserve(name, id); err != nil { - return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err) - } - defer func() { - // Release the name if the function returns with an error. - // When cleanupErr != nil, the name will be cleaned in sandbox_remove. - if retErr != nil && cleanupErr == nil { - c.sandboxNameIndex.ReleaseByName(name) - } - }() - - var ( - err error - sandboxInfo = sb.Sandbox{ID: id} - ) - - ociRuntime, err := c.getSandboxRuntime(config, r.GetRuntimeHandler()) - if err != nil { - return nil, fmt.Errorf("unable to get OCI runtime for sandbox %q: %w", id, err) - } - - sandboxInfo.Runtime.Name = ociRuntime.Type - - runtimeStart := time.Now() - // Retrieve runtime options - runtimeOpts, err := generateRuntimeOptions(ociRuntime) - if err != nil { - return nil, fmt.Errorf("failed to generate sandbox runtime options: %w", err) - } - - if runtimeOpts != nil { - sandboxInfo.Runtime.Options, err = typeurl.MarshalAny(runtimeOpts) - if err != nil { - return nil, fmt.Errorf("failed to marshal runtime options: %w", err) - } - } - - // Save sandbox name - sandboxInfo.AddLabel("name", name) - - // Create initial internal sandbox object. - sandbox := sandboxstore.NewSandbox( - sandboxstore.Metadata{ - ID: id, - Name: name, - Config: config, - RuntimeHandler: r.GetRuntimeHandler(), - }, - sandboxstore.Status{ - State: sandboxstore.StateUnknown, - }, - ) - - if _, err := c.client.SandboxStore().Create(ctx, sandboxInfo); err != nil { - return nil, fmt.Errorf("failed to save sandbox metadata: %w", err) - } - defer func() { - if retErr != nil && cleanupErr == nil { - cleanupErr = c.client.SandboxStore().Delete(ctx, id) - } - }() - - defer func() { - // Put the sandbox into sandbox store when some resources fail to be cleaned. - if retErr != nil && cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id) - if err := c.sandboxStore.Add(sandbox); err != nil { - log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox) - } - } - }() - - // XXX: What we really want here is to call controller.Platform() and then check - // platform.OS, but that is only populated after controller.Create() and that needs to be - // done later (uses sandbox.NSPath that we will set just _after_ this). - // So, lets check for the Linux section on the config, if that is populated, we assume the - // platform is linux. - // This is a hack, we should improve the controller interface to return the platform - // earlier. But should work fine for this specific use. - userNsEnabled := false - if linux := config.GetLinux(); linux != nil { - usernsOpts := linux.GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() - if usernsOpts != nil && usernsOpts.GetMode() == runtime.NamespaceMode_POD { - userNsEnabled = true - } - } - - // Setup the network namespace if host networking wasn't requested. - if !hostNetwork(config) && !userNsEnabled { - // XXX: We do c&p of this code later for the podNetwork && userNsEnabled case too. - // We can't move this to a function, as the defer calls need to be executed if other - // errors are returned in this function. So, we would need more refactors to move - // this code to a function and the idea was to not change the current code for - // !userNsEnabled case, therefore doing it would defeat the purpose. - // - // The difference between the cases is the use of netns.NewNetNS() vs - // netns.NewNetNSFromPID(). - // - // To simplify this, in the future, we should just remove this case (podNetwork && - // !userNsEnabled) and just keep the other case (podNetwork && userNsEnabled). - netStart := time.Now() - // If it is not in host network namespace then create a namespace and set the sandbox - // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network - // namespaces. If the pod is in host network namespace then both are empty and should not - // be used. - var netnsMountDir = "/var/run/netns" - if c.config.NetNSMountsUnderStateDir { - netnsMountDir = filepath.Join(c.config.StateDir, "netns") - } - sandbox.NetNS, err = netns.NewNetNS(netnsMountDir) - if err != nil { - return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err) - } - // Update network namespace in the store, which is used to generate the container's spec - sandbox.NetNSPath = sandbox.NetNS.GetPath() - defer func() { - // Remove the network namespace only if all the resource cleanup is done - if retErr != nil && cleanupErr == nil { - if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) - return - } - sandbox.NetNSPath = "" - } - }() - - if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { - return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) - } - // Save sandbox metadata to store - if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { - return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) - } - - // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call. - // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource - // creation functions. - defer func() { - // Remove the network namespace only if all the resource cleanup is done. - if retErr != nil && cleanupErr == nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - // Teardown network if an error is returned. - if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id) - } - - } - }() - - // Setup network for sandbox. - // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524) - // rely on the assumption that CRI shim will not be querying the network namespace to check the - // network states such as IP. - // In future runtime implementation should avoid relying on CRI shim implementation details. - // In this case however caching the IP will add a subtle performance enhancement by avoiding - // calls to network namespace of the pod to query the IP of the veth interface on every - // SandboxStatus request. - if err := c.setupPodNetwork(ctx, &sandbox); err != nil { - return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err) - } - sandboxCreateNetworkTimer.UpdateSince(netStart) - } - - if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { - return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) - } - - controller, err := c.getSandboxController(config, r.GetRuntimeHandler()) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox controller: %w", err) - } - - // Save sandbox metadata to store - if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { - return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) - } - - if err := controller.Create(ctx, id, sb.WithOptions(config), sb.WithNetNSPath(sandbox.NetNSPath)); err != nil { - return nil, fmt.Errorf("failed to create sandbox %q: %w", id, err) - } - - ctrl, err := controller.Start(ctx, id) - if err != nil { - sandbox.Container, _ = c.client.LoadContainer(ctx, id) - var cerr podsandbox.CleanupErr - if errors.As(err, &cerr) { - cleanupErr = fmt.Errorf("failed to cleanup sandbox: %w", cerr) - - // Strip last error as cleanup error to handle separately - if merr, ok := err.(interface{ Unwrap() []error }); ok { - if errs := merr.Unwrap(); len(errs) > 0 { - err = errs[0] - } - } - } - return nil, fmt.Errorf("failed to start sandbox %q: %w", id, err) - } - - if !hostNetwork(config) && userNsEnabled { - // If userns is enabled, then the netns was created by the OCI runtime - // on controller.Start(). The OCI runtime needs to create the netns - // because, if userns is in use, the netns needs to be owned by the - // userns. So, let the OCI runtime just handle this for us. - // If the netns is not owned by the userns several problems will happen. - // For instance, the container will lack permission (even if - // capabilities are present) to modify the netns or, even worse, the OCI - // runtime will fail to mount sysfs: - // https://github.com/torvalds/linux/commit/7dc5dbc879bd0779924b5132a48b731a0bc04a1e#diff-4839664cd0c8eab716e064323c7cd71fR1164 - // - // Note we do this after controller.Start(), as before that we - // can't get the PID for the sandbox that we need for the netns. - // Doing a controller.Status() call before that fails (can't - // find the sandbox) so we can't get the PID. - netStart := time.Now() - - // If it is not in host network namespace then create a namespace and set the sandbox - // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network - // namespaces. If the pod is in host network namespace then both are empty and should not - // be used. - var netnsMountDir = "/var/run/netns" - if c.config.NetNSMountsUnderStateDir { - netnsMountDir = filepath.Join(c.config.StateDir, "netns") - } - - sandbox.NetNS, err = netns.NewNetNSFromPID(netnsMountDir, ctrl.Pid) - if err != nil { - return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err) - } - - // Update network namespace in the store, which is used to generate the container's spec - sandbox.NetNSPath = sandbox.NetNS.GetPath() - defer func() { - // Remove the network namespace only if all the resource cleanup is done - if retErr != nil && cleanupErr == nil { - if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) - return - } - sandbox.NetNSPath = "" - } - }() - - if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { - return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) - } - // Save sandbox metadata to store - if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { - return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) - } - - // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call. - // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource - // creation functions. - defer func() { - // Remove the network namespace only if all the resource cleanup is done. - if retErr != nil && cleanupErr == nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - // Teardown network if an error is returned. - if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id) - } - - } - }() - - // Setup network for sandbox. - // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524) - // rely on the assumption that CRI shim will not be querying the network namespace to check the - // network states such as IP. - // In future runtime implementation should avoid relying on CRI shim implementation details. - // In this case however caching the IP will add a subtle performance enhancement by avoiding - // calls to network namespace of the pod to query the IP of the veth interface on every - // SandboxStatus request. - if err := c.setupPodNetwork(ctx, &sandbox); err != nil { - return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err) - } - sandboxCreateNetworkTimer.UpdateSince(netStart) - } - - // TODO: get rid of this. sandbox object should no longer have Container field. - if ociRuntime.SandboxMode == string(criconfig.ModePodSandbox) { - container, err := c.client.LoadContainer(ctx, id) - if err != nil { - return nil, fmt.Errorf("failed to load container %q for sandbox: %w", id, err) - } - sandbox.Container = container - } - - labels := ctrl.Labels - if labels == nil { - labels = map[string]string{} - } - - sandbox.ProcessLabel = labels["selinux_label"] - - err = c.nri.RunPodSandbox(ctx, &sandbox) - if err != nil { - return nil, fmt.Errorf("NRI RunPodSandbox failed: %w", err) - } - - defer func() { - if retErr != nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - c.nri.RemovePodSandbox(deferCtx, &sandbox) - } - }() - - if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { - // Set the pod sandbox as ready after successfully start sandbox container. - status.Pid = ctrl.Pid - status.State = sandboxstore.StateReady - status.CreatedAt = ctrl.CreatedAt - return status, nil - }); err != nil { - return nil, fmt.Errorf("failed to update sandbox status: %w", err) - } - - // Add sandbox into sandbox store in INIT state. - if err := c.sandboxStore.Add(sandbox); err != nil { - return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err) - } - - // Send CONTAINER_CREATED event with both ContainerId and SandboxId equal to SandboxId. - // Note that this has to be done after sandboxStore.Add() because we need to get - // SandboxStatus from the store and include it in the event. - c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) - - // TODO: Use sandbox client instead - exitCh := make(chan containerd.ExitStatus, 1) - go func() { - defer close(exitCh) - - ctx := util.NamespacedContext() - resp, err := controller.Wait(ctx, id) - if err != nil { - log.G(ctx).WithError(err).Error("failed to wait for sandbox exit") - exitCh <- *containerd.NewExitStatus(containerd.UnknownExitStatus, time.Time{}, err) - return - } - - exitCh <- *containerd.NewExitStatus(resp.ExitStatus, resp.ExitedAt, nil) - }() - - // start the monitor after adding sandbox into the store, this ensures - // that sandbox is in the store, when event monitor receives the TaskExit event. - // - // TaskOOM from containerd may come before sandbox is added to store, - // but we don't care about sandbox TaskOOM right now, so it is fine. - c.eventMonitor.startSandboxExitMonitor(context.Background(), id, ctrl.Pid, exitCh) - - // Send CONTAINER_STARTED event with ContainerId equal to SandboxId. - c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_STARTED_EVENT) - - sandboxRuntimeCreateTimer.WithValues(labels["oci_runtime_type"]).UpdateSince(runtimeStart) - - return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil -} - -// getNetworkPlugin returns the network plugin to be used by the runtime class -// defaults to the global CNI options in the CRI config -func (c *criService) getNetworkPlugin(runtimeClass string) cni.CNI { - if c.netPlugin == nil { - return nil - } - i, ok := c.netPlugin[runtimeClass] - if !ok { - if i, ok = c.netPlugin[defaultNetworkPlugin]; !ok { - return nil - } - } - return i -} - -// setupPodNetwork setups up the network for a pod -func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error { - var ( - id = sandbox.ID - config = sandbox.Config - path = sandbox.NetNSPath - netPlugin = c.getNetworkPlugin(sandbox.RuntimeHandler) - err error - result *cni.Result - ) - if netPlugin == nil { - return errors.New("cni config not initialized") - } - - opts, err := cniNamespaceOpts(id, config) - if err != nil { - return fmt.Errorf("get cni namespace options: %w", err) - } - log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") - netStart := time.Now() - if c.config.CniConfig.NetworkPluginSetupSerially { - result, err = netPlugin.SetupSerially(ctx, id, path, opts...) - } else { - result, err = netPlugin.Setup(ctx, id, path, opts...) - } - networkPluginOperations.WithValues(networkSetUpOp).Inc() - networkPluginOperationsLatency.WithValues(networkSetUpOp).UpdateSince(netStart) - if err != nil { - networkPluginOperationsErrors.WithValues(networkSetUpOp).Inc() - return err - } - logDebugCNIResult(ctx, id, result) - // Check if the default interface has IP config - if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 { - sandbox.IP, sandbox.AdditionalIPs = selectPodIPs(ctx, configs.IPConfigs, c.config.IPPreference) - sandbox.CNIResult = result - return nil - } - return fmt.Errorf("failed to find network info for sandbox %q", id) -} - -// cniNamespaceOpts get CNI namespace options from sandbox config. -func cniNamespaceOpts(id string, config *runtime.PodSandboxConfig) ([]cni.NamespaceOpts, error) { - opts := []cni.NamespaceOpts{ - cni.WithLabels(toCNILabels(id, config)), - cni.WithCapability(annotations.PodAnnotations, config.Annotations), - } - - portMappings := toCNIPortMappings(config.GetPortMappings()) - if len(portMappings) > 0 { - opts = append(opts, cni.WithCapabilityPortMap(portMappings)) - } - - // Will return an error if the bandwidth limitation has the wrong unit - // or an unreasonable value see validateBandwidthIsReasonable() - bandWidth, err := toCNIBandWidth(config.Annotations) - if err != nil { - return nil, err - } - if bandWidth != nil { - opts = append(opts, cni.WithCapabilityBandWidth(*bandWidth)) - } - - dns := toCNIDNS(config.GetDnsConfig()) - if dns != nil { - opts = append(opts, cni.WithCapabilityDNS(*dns)) - } - - if cgroup := config.GetLinux().GetCgroupParent(); cgroup != "" { - opts = append(opts, cni.WithCapabilityCgroupPath(cgroup)) - } - - return opts, nil -} - -// toCNILabels adds pod metadata into CNI labels. -func toCNILabels(id string, config *runtime.PodSandboxConfig) map[string]string { - return map[string]string{ - "K8S_POD_NAMESPACE": config.GetMetadata().GetNamespace(), - "K8S_POD_NAME": config.GetMetadata().GetName(), - "K8S_POD_INFRA_CONTAINER_ID": id, - "K8S_POD_UID": config.GetMetadata().GetUid(), - "IgnoreUnknown": "1", - } -} - -// toCNIBandWidth converts CRI annotations to CNI bandwidth. -func toCNIBandWidth(annotations map[string]string) (*cni.BandWidth, error) { - ingress, egress, err := bandwidth.ExtractPodBandwidthResources(annotations) - if err != nil { - return nil, fmt.Errorf("reading pod bandwidth annotations: %w", err) - } - - if ingress == nil && egress == nil { - return nil, nil - } - - bandWidth := &cni.BandWidth{} - - if ingress != nil { - bandWidth.IngressRate = uint64(ingress.Value()) - bandWidth.IngressBurst = math.MaxUint32 - } - - if egress != nil { - bandWidth.EgressRate = uint64(egress.Value()) - bandWidth.EgressBurst = math.MaxUint32 - } - - return bandWidth, nil -} - -// toCNIPortMappings converts CRI port mappings to CNI. -func toCNIPortMappings(criPortMappings []*runtime.PortMapping) []cni.PortMapping { - var portMappings []cni.PortMapping - for _, mapping := range criPortMappings { - if mapping.HostPort <= 0 { - continue - } - portMappings = append(portMappings, cni.PortMapping{ - HostPort: mapping.HostPort, - ContainerPort: mapping.ContainerPort, - Protocol: strings.ToLower(mapping.Protocol.String()), - HostIP: mapping.HostIp, - }) - } - return portMappings -} - -// toCNIDNS converts CRI DNSConfig to CNI. -func toCNIDNS(dns *runtime.DNSConfig) *cni.DNS { - if dns == nil { - return nil - } - return &cni.DNS{ - Servers: dns.GetServers(), - Searches: dns.GetSearches(), - Options: dns.GetOptions(), - } -} - -// selectPodIPs select an ip from the ip list. -func selectPodIPs(ctx context.Context, configs []*cni.IPConfig, preference string) (string, []string) { - if len(configs) == 1 { - return ipString(configs[0]), nil - } - toStrings := func(ips []*cni.IPConfig) (o []string) { - for _, i := range ips { - o = append(o, ipString(i)) - } - return o - } - var extra []string - switch preference { - default: - if preference != "ipv4" && preference != "" { - log.G(ctx).WithField("ip_pref", preference).Warn("invalid ip_pref, falling back to ipv4") - } - for i, ip := range configs { - if ip.IP.To4() != nil { - return ipString(ip), append(extra, toStrings(configs[i+1:])...) - } - extra = append(extra, ipString(ip)) - } - case "ipv6": - for i, ip := range configs { - if ip.IP.To4() == nil { - return ipString(ip), append(extra, toStrings(configs[i+1:])...) - } - extra = append(extra, ipString(ip)) - } - case "cni": - // use func default return - } - - all := toStrings(configs) - return all[0], all[1:] -} - -func ipString(ip *cni.IPConfig) string { - return ip.IP.String() -} - -// untrustedWorkload returns true if the sandbox contains untrusted workload. -func untrustedWorkload(config *runtime.PodSandboxConfig) bool { - return config.GetAnnotations()[annotations.UntrustedWorkload] == "true" -} - -// hostAccessingSandbox returns true if the sandbox configuration -// requires additional host access for the sandbox. -func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool { - securityContext := config.GetLinux().GetSecurityContext() - - namespaceOptions := securityContext.GetNamespaceOptions() - if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE || - namespaceOptions.GetPid() == runtime.NamespaceMode_NODE || - namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE { - return true - } - - return false -} - -// getSandboxRuntime returns the runtime configuration for sandbox. -// If the sandbox contains untrusted workload, runtime for untrusted workload will be returned, -// or else default runtime will be returned. -func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtimeHandler string) (criconfig.Runtime, error) { - if untrustedWorkload(config) { - // If the untrusted annotation is provided, runtimeHandler MUST be empty. - if runtimeHandler != "" && runtimeHandler != criconfig.RuntimeUntrusted { - return criconfig.Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed") - } - - // If the untrusted workload is requesting access to the host/node, this request will fail. - // - // Note: If the workload is marked untrusted but requests privileged, this can be granted, as the - // runtime may support this. For example, in a virtual-machine isolated runtime, privileged - // is a supported option, granting the workload to access the entire guest VM instead of host. - // TODO(windows): Deprecate this so that we don't need to handle it for windows. - if hostAccessingSandbox(config) { - return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed") - } - - runtimeHandler = criconfig.RuntimeUntrusted - } - - if runtimeHandler == "" { - runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName - } - - handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler] - if !ok { - return criconfig.Runtime{}, fmt.Errorf("no runtime for %q is configured", runtimeHandler) - } - return handler, nil -} - -// getSandboxController returns the sandbox controller configuration for sandbox. -// If absent in legacy case, it will return the default controller. -func (c *criService) getSandboxController(config *runtime.PodSandboxConfig, runtimeHandler string) (sb.Controller, error) { - ociRuntime, err := c.getSandboxRuntime(config, runtimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) - } - // Validate mode - if err = ValidateMode(ociRuntime.SandboxMode); err != nil { - return nil, err - } - // Use sandbox controller to delete sandbox - controller, exist := c.sandboxControllers[criconfig.SandboxControllerMode(ociRuntime.SandboxMode)] - if !exist { - return nil, fmt.Errorf("sandbox controller %s not exist", ociRuntime.SandboxMode) - } - return controller, nil -} - -func logDebugCNIResult(ctx context.Context, sandboxID string, result *cni.Result) { - if log.GetLevel() < log.DebugLevel { - return - } - cniResult, err := json.Marshal(result) - if err != nil { - log.G(ctx).WithField("podsandboxid", sandboxID).WithError(err).Errorf("Failed to marshal CNI result: %v", err) - return - } - log.G(ctx).WithField("podsandboxid", sandboxID).Debugf("cni result: %s", string(cniResult)) -} diff --git a/pkg/cri/sbserver/sandbox_run_test.go b/pkg/cri/sbserver/sandbox_run_test.go deleted file mode 100644 index 238ef031f..000000000 --- a/pkg/cri/sbserver/sandbox_run_test.go +++ /dev/null @@ -1,192 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "net" - "testing" - - "github.com/containerd/go-cni" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestToCNIPortMappings(t *testing.T) { - for _, test := range []struct { - desc string - criPortMappings []*runtime.PortMapping - cniPortMappings []cni.PortMapping - }{ - { - desc: "empty CRI port mapping should map to empty CNI port mapping", - }, - { - desc: "CRI port mapping should be converted to CNI port mapping properly", - criPortMappings: []*runtime.PortMapping{ - { - Protocol: runtime.Protocol_UDP, - ContainerPort: 1234, - HostPort: 5678, - HostIp: "123.124.125.126", - }, - { - Protocol: runtime.Protocol_TCP, - ContainerPort: 4321, - HostPort: 8765, - HostIp: "126.125.124.123", - }, - { - Protocol: runtime.Protocol_SCTP, - ContainerPort: 1234, - HostPort: 5678, - HostIp: "123.124.125.126", - }, - }, - cniPortMappings: []cni.PortMapping{ - { - HostPort: 5678, - ContainerPort: 1234, - Protocol: "udp", - HostIP: "123.124.125.126", - }, - { - HostPort: 8765, - ContainerPort: 4321, - Protocol: "tcp", - HostIP: "126.125.124.123", - }, - { - HostPort: 5678, - ContainerPort: 1234, - Protocol: "sctp", - HostIP: "123.124.125.126", - }, - }, - }, - { - desc: "CRI port mapping without host port should be skipped", - criPortMappings: []*runtime.PortMapping{ - { - Protocol: runtime.Protocol_UDP, - ContainerPort: 1234, - HostIp: "123.124.125.126", - }, - { - Protocol: runtime.Protocol_TCP, - ContainerPort: 4321, - HostPort: 8765, - HostIp: "126.125.124.123", - }, - }, - cniPortMappings: []cni.PortMapping{ - { - HostPort: 8765, - ContainerPort: 4321, - Protocol: "tcp", - HostIP: "126.125.124.123", - }, - }, - }, - { - desc: "CRI port mapping with unsupported protocol should be skipped", - criPortMappings: []*runtime.PortMapping{ - { - Protocol: runtime.Protocol_TCP, - ContainerPort: 4321, - HostPort: 8765, - HostIp: "126.125.124.123", - }, - }, - cniPortMappings: []cni.PortMapping{ - { - HostPort: 8765, - ContainerPort: 4321, - Protocol: "tcp", - HostIP: "126.125.124.123", - }, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - assert.Equal(t, test.cniPortMappings, toCNIPortMappings(test.criPortMappings)) - }) - } -} - -func TestSelectPodIP(t *testing.T) { - for _, test := range []struct { - desc string - ips []string - expectedIP string - expectedAdditionalIPs []string - pref string - }{ - { - desc: "ipv4 should be picked even if ipv6 comes first", - ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, - expectedIP: "192.168.17.43", - expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334"}, - }, - { - desc: "ipv6 should be picked even if ipv4 comes first", - ips: []string{"192.168.17.43", "2001:db8:85a3::8a2e:370:7334"}, - expectedIP: "2001:db8:85a3::8a2e:370:7334", - expectedAdditionalIPs: []string{"192.168.17.43"}, - pref: "ipv6", - }, - { - desc: "order should reflect ip selection", - ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, - expectedIP: "2001:db8:85a3::8a2e:370:7334", - expectedAdditionalIPs: []string{"192.168.17.43"}, - pref: "cni", - }, - { - desc: "ipv4 should be picked when there is only ipv4", - ips: []string{"192.168.17.43"}, - expectedIP: "192.168.17.43", - expectedAdditionalIPs: nil, - }, - { - desc: "ipv6 should be picked when there is no ipv4", - ips: []string{"2001:db8:85a3::8a2e:370:7334"}, - expectedIP: "2001:db8:85a3::8a2e:370:7334", - expectedAdditionalIPs: nil, - }, - { - desc: "the first ipv4 should be picked when there are multiple ipv4", // unlikely to happen - ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"}, - expectedIP: "192.168.17.43", - expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"}, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - var ipConfigs []*cni.IPConfig - for _, ip := range test.ips { - ipConfigs = append(ipConfigs, &cni.IPConfig{ - IP: net.ParseIP(ip), - }) - } - ip, additionalIPs := selectPodIPs(context.Background(), ipConfigs, test.pref) - assert.Equal(t, test.expectedIP, ip) - assert.Equal(t, test.expectedAdditionalIPs, additionalIPs) - }) - } -} diff --git a/pkg/cri/sbserver/sandbox_stats.go b/pkg/cri/sbserver/sandbox_stats.go deleted file mode 100644 index 464435168..000000000 --- a/pkg/cri/sbserver/sandbox_stats.go +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) PodSandboxStats( - ctx context.Context, - r *runtime.PodSandboxStatsRequest, -) (*runtime.PodSandboxStatsResponse, error) { - - sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when trying to find sandbox %s: %w", r.GetPodSandboxId(), err) - } - - podSandboxStats, err := c.podSandboxStats(ctx, sandbox) - if err != nil { - return nil, fmt.Errorf("failed to decode pod sandbox metrics %s: %w", r.GetPodSandboxId(), err) - } - - return &runtime.PodSandboxStatsResponse{Stats: podSandboxStats}, nil -} diff --git a/pkg/cri/sbserver/sandbox_stats_linux.go b/pkg/cri/sbserver/sandbox_stats_linux.go deleted file mode 100644 index 93a4b94a8..000000000 --- a/pkg/cri/sbserver/sandbox_stats_linux.go +++ /dev/null @@ -1,179 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "time" - - "github.com/containerd/cgroups/v3" - "github.com/containerd/cgroups/v3/cgroup1" - cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2" - "github.com/containerd/containerd/errdefs" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - "github.com/containerd/log" - "github.com/containernetworking/plugins/pkg/ns" - "github.com/vishvananda/netlink" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) podSandboxStats( - ctx context.Context, - sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { - meta := sandbox.Metadata - - if sandbox.Status.Get().State != sandboxstore.StateReady { - return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state: %w", meta.ID, errdefs.ErrUnavailable) - } - - stats, err := metricsForSandbox(sandbox) - if err != nil { - return nil, fmt.Errorf("failed getting metrics for sandbox %s: %w", sandbox.ID, err) - } - - podSandboxStats := &runtime.PodSandboxStats{ - Linux: &runtime.LinuxPodSandboxStats{}, - Attributes: &runtime.PodSandboxAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - }, - } - - if stats != nil { - timestamp := time.Now() - - cpuStats, err := c.cpuContainerStats(meta.ID, true /* isSandbox */, stats, timestamp) - if err != nil { - return nil, fmt.Errorf("failed to obtain cpu stats: %w", err) - } - podSandboxStats.Linux.Cpu = cpuStats - - memoryStats, err := c.memoryContainerStats(meta.ID, stats, timestamp) - if err != nil { - return nil, fmt.Errorf("failed to obtain memory stats: %w", err) - } - podSandboxStats.Linux.Memory = memoryStats - - if sandbox.NetNSPath != "" { - rxBytes, rxErrors, txBytes, txErrors := getContainerNetIO(ctx, sandbox.NetNSPath) - podSandboxStats.Linux.Network = &runtime.NetworkUsage{ - DefaultInterface: &runtime.NetworkInterfaceUsage{ - Name: defaultIfName, - RxBytes: &runtime.UInt64Value{Value: rxBytes}, - RxErrors: &runtime.UInt64Value{Value: rxErrors}, - TxBytes: &runtime.UInt64Value{Value: txBytes}, - TxErrors: &runtime.UInt64Value{Value: txErrors}, - }, - } - } - - var pidCount uint64 - for _, cntr := range c.containerStore.List() { - if cntr.SandboxID != sandbox.ID { - continue - } - - state := cntr.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING { - continue - } - - task, err := cntr.Container.Task(ctx, nil) - if err != nil { - return nil, err - } - - processes, err := task.Pids(ctx) - if err != nil { - return nil, err - } - pidCount += uint64(len(processes)) - - } - podSandboxStats.Linux.Process = &runtime.ProcessUsage{ - Timestamp: timestamp.UnixNano(), - ProcessCount: &runtime.UInt64Value{Value: pidCount}, - } - - listContainerStatsRequest := &runtime.ListContainerStatsRequest{Filter: &runtime.ContainerStatsFilter{PodSandboxId: meta.ID}} - resp, err := c.ListContainerStats(ctx, listContainerStatsRequest) - if err != nil { - return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err) - } - podSandboxStats.Linux.Containers = resp.GetStats() - } - - return podSandboxStats, nil -} - -// https://github.com/cri-o/cri-o/blob/74a5cf8dffd305b311eb1c7f43a4781738c388c1/internal/oci/stats.go#L32 -func getContainerNetIO(ctx context.Context, netNsPath string) (rxBytes, rxErrors, txBytes, txErrors uint64) { - ns.WithNetNSPath(netNsPath, func(_ ns.NetNS) error { - link, err := netlink.LinkByName(defaultIfName) - if err != nil { - log.G(ctx).WithError(err).Errorf("unable to retrieve network namespace stats for netNsPath: %v, interface: %v", netNsPath, defaultIfName) - return err - } - attrs := link.Attrs() - if attrs != nil && attrs.Statistics != nil { - rxBytes = attrs.Statistics.RxBytes - rxErrors = attrs.Statistics.RxErrors - txBytes = attrs.Statistics.TxBytes - txErrors = attrs.Statistics.TxErrors - } - return nil - }) - - return rxBytes, rxErrors, txBytes, txErrors -} - -func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { - cgroupPath := sandbox.Config.GetLinux().GetCgroupParent() - - if cgroupPath == "" { - return nil, fmt.Errorf("failed to get cgroup metrics for sandbox %v because cgroupPath is empty", sandbox.ID) - } - - var statsx interface{} - if cgroups.Mode() == cgroups.Unified { - cg, err := cgroupsv2.Load(cgroupPath) - if err != nil { - return nil, fmt.Errorf("failed to load sandbox cgroup: %v: %w", cgroupPath, err) - } - stats, err := cg.Stat() - if err != nil { - return nil, fmt.Errorf("failed to get stats for cgroup: %v: %w", cgroupPath, err) - } - statsx = stats - - } else { - control, err := cgroup1.Load(cgroup1.StaticPath(cgroupPath)) - if err != nil { - return nil, fmt.Errorf("failed to load sandbox cgroup %v: %w", cgroupPath, err) - } - stats, err := control.Stat(cgroup1.IgnoreNotExist) - if err != nil { - return nil, fmt.Errorf("failed to get stats for cgroup %v: %w", cgroupPath, err) - } - statsx = stats - } - - return statsx, nil -} diff --git a/pkg/cri/sbserver/sandbox_stats_list.go b/pkg/cri/sbserver/sandbox_stats_list.go deleted file mode 100644 index ea03c9bc7..000000000 --- a/pkg/cri/sbserver/sandbox_stats_list.go +++ /dev/null @@ -1,83 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - - "github.com/containerd/containerd/errdefs" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - "github.com/containerd/log" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ListPodSandboxStats returns stats of all ready sandboxes. -func (c *criService) ListPodSandboxStats( - ctx context.Context, - r *runtime.ListPodSandboxStatsRequest, -) (*runtime.ListPodSandboxStatsResponse, error) { - sandboxes := c.sandboxesForListPodSandboxStatsRequest(r) - - var errs []error - podSandboxStats := new(runtime.ListPodSandboxStatsResponse) - for _, sandbox := range sandboxes { - sandboxStats, err := c.podSandboxStats(ctx, sandbox) - switch { - case errdefs.IsUnavailable(err): - log.G(ctx).WithField("podsandboxid", sandbox.ID).Debugf("failed to get pod sandbox stats, this is likely a transient error: %v", err) - case err != nil: - errs = append(errs, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err)) - default: - podSandboxStats.Stats = append(podSandboxStats.Stats, sandboxStats) - } - } - - return podSandboxStats, errors.Join(errs...) -} - -func (c *criService) sandboxesForListPodSandboxStatsRequest(r *runtime.ListPodSandboxStatsRequest) []sandboxstore.Sandbox { - sandboxesInStore := c.sandboxStore.List() - - if r.GetFilter() == nil { - return sandboxesInStore - } - - c.normalizePodSandboxStatsFilter(r.GetFilter()) - - var sandboxes []sandboxstore.Sandbox - for _, sandbox := range sandboxesInStore { - if r.GetFilter().GetId() != "" && sandbox.ID != r.GetFilter().GetId() { - continue - } - - if r.GetFilter().GetLabelSelector() != nil && - !matchLabelSelector(r.GetFilter().GetLabelSelector(), sandbox.Config.GetLabels()) { - continue - } - - // We can't obtain metrics for sandboxes that aren't in ready state - if sandbox.Status.Get().State != sandboxstore.StateReady { - continue - } - - sandboxes = append(sandboxes, sandbox) - } - - return sandboxes -} diff --git a/pkg/cri/sbserver/sandbox_stats_other.go b/pkg/cri/sbserver/sandbox_stats_other.go deleted file mode 100644 index 8a249a485..000000000 --- a/pkg/cri/sbserver/sandbox_stats_other.go +++ /dev/null @@ -1,34 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - - "github.com/containerd/containerd/errdefs" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) podSandboxStats( - ctx context.Context, - sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { - return nil, fmt.Errorf("pod sandbox stats not implemented: %w", errdefs.ErrNotImplemented) -} diff --git a/pkg/cri/sbserver/sandbox_stats_windows.go b/pkg/cri/sbserver/sandbox_stats_windows.go deleted file mode 100644 index 3c9d2dead..000000000 --- a/pkg/cri/sbserver/sandbox_stats_windows.go +++ /dev/null @@ -1,441 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "time" - - "github.com/Microsoft/hcsshim" - wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" - "github.com/Microsoft/hcsshim/hcn" - "github.com/containerd/containerd/api/services/tasks/v1" - "github.com/containerd/containerd/api/types" - "github.com/containerd/containerd/errdefs" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - "github.com/containerd/containerd/pkg/cri/store/stats" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) podSandboxStats( - ctx context.Context, - sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { - meta := sandbox.Metadata - - if sandbox.Status.Get().State != sandboxstore.StateReady { - return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state: %w", meta.ID, errdefs.ErrUnavailable) - } - - timestamp := time.Now() - podSandboxStats := &runtime.PodSandboxStats{ - Windows: &runtime.WindowsPodSandboxStats{}, - Attributes: &runtime.PodSandboxAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - }, - } - - metrics, containers, err := c.listWindowsMetricsForSandbox(ctx, sandbox) - if err != nil { - return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err) - } - - statsMap, err := convertMetricsToWindowsStats(metrics, sandbox) - if err != nil { - return nil, fmt.Errorf("failed to convert stats: %w", err) - } - - podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp) - if err != nil { - return nil, fmt.Errorf("failed to convert container stats during podSandboxStats call: %w", err) - } - podSandboxStats.Windows.Cpu = podCPU.Cpu - podSandboxStats.Windows.Memory = podCPU.Memory - podSandboxStats.Windows.Containers = containerStats - podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp) - - pidCount, err := c.getSandboxPidCount(ctx, sandbox) - if err != nil { - return nil, fmt.Errorf("failed to get pid count: %w", err) - } - - podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{ - Timestamp: timestamp.UnixNano(), - ProcessCount: &runtime.UInt64Value{Value: pidCount}, - } - - c.saveSandBoxMetrics(podSandboxStats.Attributes.Id, podSandboxStats) - - return podSandboxStats, nil -} - -func convertMetricsToWindowsStats(metrics []*types.Metric, sandbox sandboxstore.Sandbox) (map[string]*wstats.Statistics, error) { - isHostProcess := sandbox.Config.GetWindows().GetSecurityContext().GetHostProcess() - - statsMap := make(map[string]*wstats.Statistics) - for _, stat := range metrics { - containerStatsData, err := typeurl.UnmarshalAny(stat.Data) - if err != nil { - return nil, fmt.Errorf("failed to extract metrics for container with id %s: %w", stat.ID, err) - } - - // extract the metrics if available for this container - // containerStatsData can be nil for pods that don't have an actual podsandbox container such as HPC - // In the case of HostProcess sandbox container we will use the nil value for the statsmap which is used later - // otherwise return an error since we should have gotten stats - containerStats, ok := containerStatsData.(*wstats.Statistics) - if !ok && !(isHostProcess && sandbox.ID == stat.ID) { - return nil, fmt.Errorf("failed to extract metrics for container with id %s: %w", stat.ID, err) - } - - statsMap[stat.ID] = containerStats - } - return statsMap, nil -} - -func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap map[string]*wstats.Statistics, containers []containerstore.Container, timestamp time.Time) (*runtime.WindowsContainerStats, []*runtime.WindowsContainerStats, error) { - podMetric, ok := statsMap[sandbox.ID] - if !ok { - return nil, nil, fmt.Errorf("failed to find container metric for pod with id %s", sandbox.ID) - } - - podRuntimeStats, err := c.convertToCRIStats(podMetric) - if err != nil { - return nil, nil, fmt.Errorf("failed to covert container metrics for sandbox with id %s: %w", sandbox.ID, err) - } - - windowsContainerStats := make([]*runtime.WindowsContainerStats, 0, len(statsMap)) - for _, cntr := range containers { - containerMetric := statsMap[cntr.ID] - - if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING { - // containers that are just created, in a failed state or exited (init containers) will not have stats - log.L.Warnf("failed to get container stats since container %q is not in running state", cntr.ID) - continue - } - - if containerMetric == nil { - log.L.Warnf("no metrics found for container %q", cntr.ID) - continue - } - - containerStats, err := c.convertToCRIStats(containerMetric) - if err != nil { - return nil, nil, fmt.Errorf("failed to convert metrics for container with id %s: %w", cntr.ID, err) - } - - // Calculate NanoCores for container - if containerStats.Cpu != nil && containerStats.Cpu.UsageCoreNanoSeconds != nil { - nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp) - containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage} - } - - // On Windows we need to add up all the podStatsData to get the Total for the Pod as there isn't something - // like a parent cgroup that queried for all the pod podStatsData - appendCPUPodStats(podRuntimeStats, containerStats, timestamp) - appendMemoryPodStats(podRuntimeStats, containerStats, timestamp) - - // If snapshotstore doesn't have cached snapshot information - // set WritableLayer usage to zero - var usedBytes uint64 - sn, err := c.GetSnapshot(cntr.ID) - if err == nil { - usedBytes = sn.Size - } - containerStats.WritableLayer = &runtime.WindowsFilesystemUsage{ - Timestamp: sn.Timestamp, - FsId: &runtime.FilesystemIdentifier{ - Mountpoint: c.imageFSPath, - }, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - } - - containerStats.Attributes = &runtime.ContainerAttributes{ - Id: cntr.ID, - Metadata: cntr.Config.GetMetadata(), - Labels: cntr.Config.GetLabels(), - Annotations: cntr.Config.GetAnnotations(), - } - - windowsContainerStats = append(windowsContainerStats, containerStats) - } - - // Calculate NanoCores for pod after adding containers cpu including the pods cpu - if podRuntimeStats.Cpu != nil && podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil { - nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp) - podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage} - } - - return podRuntimeStats, windowsContainerStats, nil -} - -func appendCPUPodStats(podRuntimeStats *runtime.WindowsContainerStats, containerRunTimeStats *runtime.WindowsContainerStats, timestamp time.Time) { - // protect against missing stats in case container hasn't started yet - if containerRunTimeStats.Cpu == nil || containerRunTimeStats.Cpu.UsageCoreNanoSeconds == nil { - return - } - - // It is possible the pod sandbox might not be populated with values if it doesn't exist - // HostProcess pods are an example where there is no actual pod sandbox running and therefor no stats - if podRuntimeStats.Cpu == nil { - podRuntimeStats.Cpu = &runtime.WindowsCpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 0}, - } - } - - if podRuntimeStats.Cpu.UsageCoreNanoSeconds == nil { - podRuntimeStats.Cpu.UsageCoreNanoSeconds = &runtime.UInt64Value{Value: 0} - } - - podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value += containerRunTimeStats.Cpu.UsageCoreNanoSeconds.Value -} - -func appendMemoryPodStats(podRuntimeStats *runtime.WindowsContainerStats, containerRunTimeStats *runtime.WindowsContainerStats, timestamp time.Time) { - // protect against missing stats in case container hasn't started yet - if containerRunTimeStats.Memory == nil { - return - } - - // It is possible the pod sandbox might not be populated with values if it doesn't exist - // HostProcess pods are an example where there is no actual pod sandbox running and therefor no stats - if podRuntimeStats.Memory == nil { - podRuntimeStats.Memory = &runtime.WindowsMemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 0}, - AvailableBytes: &runtime.UInt64Value{Value: 0}, - PageFaults: &runtime.UInt64Value{Value: 0}, - CommitMemoryBytes: &runtime.UInt64Value{Value: 0}, - } - } - - if containerRunTimeStats.Memory.WorkingSetBytes != nil { - if podRuntimeStats.Memory.WorkingSetBytes == nil { - podRuntimeStats.Memory.WorkingSetBytes = &runtime.UInt64Value{Value: 0} - } - podRuntimeStats.Memory.WorkingSetBytes.Value += containerRunTimeStats.Memory.WorkingSetBytes.Value - } - - if containerRunTimeStats.Memory.AvailableBytes != nil { - if podRuntimeStats.Memory.AvailableBytes == nil { - podRuntimeStats.Memory.AvailableBytes = &runtime.UInt64Value{Value: 0} - } - podRuntimeStats.Memory.AvailableBytes.Value += containerRunTimeStats.Memory.AvailableBytes.Value - } - - if containerRunTimeStats.Memory.PageFaults != nil { - if podRuntimeStats.Memory.PageFaults == nil { - podRuntimeStats.Memory.PageFaults = &runtime.UInt64Value{Value: 0} - } - podRuntimeStats.Memory.PageFaults.Value += containerRunTimeStats.Memory.PageFaults.Value - } -} - -func (c *criService) listWindowsMetricsForSandbox(ctx context.Context, sandbox sandboxstore.Sandbox) ([]*types.Metric, []containerstore.Container, error) { - req := &tasks.MetricsRequest{} - var containers []containerstore.Container - for _, cntr := range c.containerStore.List() { - if cntr.SandboxID != sandbox.ID { - continue - } - containers = append(containers, cntr) - req.Filters = append(req.Filters, "id=="+cntr.ID) - } - - //add sandbox container as well - req.Filters = append(req.Filters, "id=="+sandbox.ID) - - resp, err := c.client.TaskService().Metrics(ctx, req) - if err != nil { - return nil, nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err) - } - return resp.Metrics, containers, nil -} - -func (c *criService) convertToCRIStats(stats *wstats.Statistics) (*runtime.WindowsContainerStats, error) { - var cs runtime.WindowsContainerStats - // the metric should exist but stats or stats.container will be nil for HostProcess sandbox containers - // this can also be the case when the container has not started yet - if stats != nil && stats.Container != nil { - wstats := stats.GetWindows() - if wstats == nil { - return nil, fmt.Errorf("windows stats is empty") - } - if wstats.Processor != nil { - cs.Cpu = &runtime.WindowsCpuUsage{ - Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, - } - } - - if wstats.Memory != nil { - cs.Memory = &runtime.WindowsMemoryUsage{ - Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, - }, - CommitMemoryBytes: &runtime.UInt64Value{ - Value: wstats.Memory.MemoryUsageCommitBytes, - }, - } - } - - } - return &cs, nil -} - -func getUsageNanoCores(usageCoreNanoSeconds uint64, oldStats *stats.ContainerStats, newtimestamp int64) uint64 { - if oldStats == nil { - return 0 - } - - nanoSeconds := newtimestamp - oldStats.Timestamp.UnixNano() - - // zero or negative interval - if nanoSeconds <= 0 { - return 0 - } - - return uint64(float64(usageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) / - float64(nanoSeconds) * float64(time.Second/time.Nanosecond)) -} - -func windowsNetworkUsage(ctx context.Context, sandbox sandboxstore.Sandbox, timestamp time.Time) *runtime.WindowsNetworkUsage { - eps, err := hcn.GetNamespaceEndpointIds(sandbox.NetNSPath) - if err != nil { - log.G(ctx).WithField("podsandboxid", sandbox.ID).WithError(err).Errorf("unable to retrieve windows endpoint metrics for netNsPath: %v", sandbox.NetNSPath) - return nil - } - networkUsage := &runtime.WindowsNetworkUsage{ - Timestamp: timestamp.UnixNano(), - } - for _, ep := range eps { - endpointStats, err := hcsshim.GetHNSEndpointStats(ep) - if err != nil { - log.G(ctx).WithError(err).Errorf("unable to gather stats for endpoint: %s", ep) - continue - } - rtStats := runtime.WindowsNetworkInterfaceUsage{ - Name: endpointStats.EndpointID, - RxBytes: &runtime.UInt64Value{Value: endpointStats.BytesReceived}, - RxPacketsDropped: &runtime.UInt64Value{Value: endpointStats.DroppedPacketsIncoming}, - TxBytes: &runtime.UInt64Value{Value: endpointStats.BytesSent}, - TxPacketsDropped: &runtime.UInt64Value{Value: endpointStats.DroppedPacketsOutgoing}, - } - networkUsage.Interfaces = append(networkUsage.Interfaces, &rtStats) - - // if the default interface isn't set add it. - // We don't have a way to determine the default interface in windows - if networkUsage.DefaultInterface == nil { - networkUsage.DefaultInterface = &rtStats - } - } - - return networkUsage -} - -func (c *criService) saveSandBoxMetrics(sandboxID string, sandboxStats *runtime.PodSandboxStats) error { - // we may not have stats since container hasn't started yet so skip saving to cache - if sandboxStats == nil || sandboxStats.Windows == nil || sandboxStats.Windows.Cpu == nil || - sandboxStats.Windows.Cpu.UsageCoreNanoSeconds == nil { - return nil - } - - newStats := &stats.ContainerStats{ - UsageCoreNanoSeconds: sandboxStats.Windows.Cpu.UsageCoreNanoSeconds.Value, - Timestamp: time.Unix(0, sandboxStats.Windows.Cpu.Timestamp), - } - err := c.sandboxStore.UpdateContainerStats(sandboxID, newStats) - if err != nil { - return err - } - - // We queried the stats when getting sandbox stats. We need to save the query to cache - for _, cntr := range sandboxStats.Windows.Containers { - // we may not have stats since container hasn't started yet so skip saving to cache - if cntr == nil || cntr.Cpu == nil || cntr.Cpu.UsageCoreNanoSeconds == nil { - return nil - } - - newStats := &stats.ContainerStats{ - UsageCoreNanoSeconds: cntr.Cpu.UsageCoreNanoSeconds.Value, - Timestamp: time.Unix(0, cntr.Cpu.Timestamp), - } - err = c.containerStore.UpdateContainerStats(cntr.Attributes.Id, newStats) - if err != nil { - return err - } - } - - return nil -} - -func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstore.Sandbox) (uint64, error) { - var pidCount uint64 - - // get process count inside PodSandbox for Windows - task, err := sandbox.Container.Task(ctx, nil) - if err != nil { - if errdefs.IsNotFound(err) { - return 0, nil - } - return 0, err - } - processes, err := task.Pids(ctx) - if err != nil { - if errdefs.IsNotFound(err) { - return 0, nil - } - return 0, err - } - pidCount += uint64(len(processes)) - - for _, cntr := range c.containerStore.List() { - if cntr.SandboxID != sandbox.ID { - continue - } - - state := cntr.Status.Get().State() - if state != runtime.ContainerState_CONTAINER_RUNNING { - continue - } - - task, err := cntr.Container.Task(ctx, nil) - if err != nil { - return 0, err - } - - processes, err := task.Pids(ctx) - if err != nil { - if errdefs.IsNotFound(err) { - continue - } - return 0, err - } - pidCount += uint64(len(processes)) - - } - - return pidCount, nil -} diff --git a/pkg/cri/sbserver/sandbox_stats_windows_test.go b/pkg/cri/sbserver/sandbox_stats_windows_test.go deleted file mode 100644 index 75432256a..000000000 --- a/pkg/cri/sbserver/sandbox_stats_windows_test.go +++ /dev/null @@ -1,608 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - "time" - - wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - "github.com/containerd/containerd/pkg/cri/store/stats" - "github.com/containerd/containerd/protobuf" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestGetUsageNanoCores(t *testing.T) { - timestamp := time.Now() - secondAfterTimeStamp := timestamp.Add(time.Second) - ID := "ID" - - for _, test := range []struct { - desc string - firstCPUValue uint64 - secondCPUValue uint64 - expectedNanoCoreUsageFirst uint64 - expectedNanoCoreUsageSecond uint64 - }{ - { - desc: "metrics", - firstCPUValue: 50, - secondCPUValue: 500, - expectedNanoCoreUsageFirst: 0, - expectedNanoCoreUsageSecond: 450, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - container, err := containerstore.NewContainer( - containerstore.Metadata{ID: ID}, - ) - assert.NoError(t, err) - - // calculate for first iteration - // first run so container stats will be nil - assert.Nil(t, container.Stats) - cpuUsage := getUsageNanoCores(test.firstCPUValue, container.Stats, timestamp.UnixNano()) - assert.NoError(t, err) - assert.Equal(t, test.expectedNanoCoreUsageFirst, cpuUsage) - - // fill in the stats as if they now exist - container.Stats = &stats.ContainerStats{} - container.Stats.UsageCoreNanoSeconds = test.firstCPUValue - container.Stats.Timestamp = timestamp - assert.NotNil(t, container.Stats) - - // calculate for second iteration - cpuUsage = getUsageNanoCores(test.secondCPUValue, container.Stats, secondAfterTimeStamp.UnixNano()) - assert.NoError(t, err) - assert.Equal(t, test.expectedNanoCoreUsageSecond, cpuUsage) - }) - } - -} - -func Test_criService_podSandboxStats(t *testing.T) { - initialStatsTimestamp := time.Now() - currentStatsTimestamp := initialStatsTimestamp.Add(time.Second) - - c := newTestCRIService() - - type expectedStats struct { - UsageCoreNanoSeconds uint64 - UsageNanoCores uint64 - WorkingSetBytes uint64 - CommitMemoryBytes uint64 - } - for _, test := range []struct { - desc string - metrics map[string]*wstats.Statistics - sandbox sandboxstore.Sandbox - containers []containerstore.Container - expectedPodStats *expectedStats - expectedContainerStats []expectedStats - expectError bool - }{ - { - desc: "no metrics found should return error", - metrics: map[string]*wstats.Statistics{}, - sandbox: sandboxstore.Sandbox{}, - containers: []containerstore.Container{}, - expectedPodStats: &expectedStats{}, - expectedContainerStats: []expectedStats{}, - expectError: true, - }, - { - desc: "pod stats will include the container stats", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - "s1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - }, - sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}}, - containers: []containerstore.Container{ - newContainer("c1", running, nil), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 400, - UsageNanoCores: 0, - WorkingSetBytes: 40, - CommitMemoryBytes: 40, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 200, - UsageNanoCores: 0, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod stats will include the init container stats", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - "s1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - "i1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - }, - sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}}, - containers: []containerstore.Container{ - newContainer("c1", running, nil), - newContainer("i1", running, nil), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 600, - UsageNanoCores: 0, - WorkingSetBytes: 60, - CommitMemoryBytes: 60, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 200, - UsageNanoCores: 0, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - { - UsageCoreNanoSeconds: 200, - UsageNanoCores: 0, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod stats will not include the init container stats if it is stopped", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - "s1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - }, - sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}}, - containers: []containerstore.Container{ - newContainer("c1", running, nil), - newContainer("i1", exitedValid, nil), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 400, - UsageNanoCores: 0, - WorkingSetBytes: 40, - CommitMemoryBytes: 40, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 200, - UsageNanoCores: 0, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod stats will not include the init container stats if it is stopped in failed state", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - "s1": { - Container: windowsStat(currentStatsTimestamp, 200, 20, 20), - }, - }, - sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}}, - containers: []containerstore.Container{ - newContainer("c1", running, nil), - newContainer("i1", exitedInvalid, nil), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 400, - UsageNanoCores: 0, - WorkingSetBytes: 40, - CommitMemoryBytes: 40, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 200, - UsageNanoCores: 0, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod with existing stats will have usagenanocores totalled across pods and containers", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 400, 20, 20), - }, - "s1": { - Container: windowsStat(currentStatsTimestamp, 400, 20, 20), - }, - }, - sandbox: sandboxPod("s1", initialStatsTimestamp, 400), - containers: []containerstore.Container{ - newContainer("c1", running, &stats.ContainerStats{ - Timestamp: initialStatsTimestamp, - UsageCoreNanoSeconds: 200, - }), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 800, - UsageNanoCores: 400, - WorkingSetBytes: 40, - CommitMemoryBytes: 40, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 400, - UsageNanoCores: 200, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod sandbox with nil stats still works (hostprocess container scenario)", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 400, 20, 20), - }, - "s1": nil, - }, - sandbox: sandboxPod("s1", initialStatsTimestamp, 200), - containers: []containerstore.Container{ - newContainer("c1", running, &stats.ContainerStats{ - Timestamp: initialStatsTimestamp, - UsageCoreNanoSeconds: 200, - }), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 400, - UsageNanoCores: 200, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 400, - UsageNanoCores: 200, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod sandbox with empty stats still works (hostprocess container scenario)", - metrics: map[string]*wstats.Statistics{ - "c1": { - Container: windowsStat(currentStatsTimestamp, 400, 20, 20), - }, - "s1": {}, - }, - sandbox: sandboxPod("s1", initialStatsTimestamp, 200), - containers: []containerstore.Container{ - newContainer("c1", running, &stats.ContainerStats{ - Timestamp: initialStatsTimestamp, - UsageCoreNanoSeconds: 200, - }), - }, - expectedPodStats: &expectedStats{ - UsageCoreNanoSeconds: 400, - UsageNanoCores: 200, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - expectedContainerStats: []expectedStats{ - { - UsageCoreNanoSeconds: 400, - UsageNanoCores: 200, - WorkingSetBytes: 20, - CommitMemoryBytes: 20, - }, - }, - expectError: false, - }, - { - desc: "pod sandbox with a container that has no cpu shouldn't error", - metrics: map[string]*wstats.Statistics{ - "c1": {}, - "s1": {}, - }, - sandbox: sandboxPod("s1", initialStatsTimestamp, 200), - containers: []containerstore.Container{ - newContainer("c1", running, &stats.ContainerStats{ - Timestamp: initialStatsTimestamp, - UsageCoreNanoSeconds: 200, - }), - }, - expectedPodStats: nil, - expectedContainerStats: []expectedStats{}, - expectError: false, - }, - { - desc: "pod sandbox with no stats in metric mapp will fail", - metrics: map[string]*wstats.Statistics{}, - sandbox: sandboxPod("s1", initialStatsTimestamp, 200), - containers: []containerstore.Container{}, - expectedPodStats: nil, - expectedContainerStats: []expectedStats{}, - expectError: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - actualPodStats, actualContainerStats, err := c.toPodSandboxStats(test.sandbox, test.metrics, test.containers, currentStatsTimestamp) - if test.expectError { - assert.NotNil(t, err) - return - } - assert.Nil(t, err) - - if test.expectedPodStats == nil { - assert.Nil(t, actualPodStats.Cpu) - assert.Nil(t, actualPodStats.Memory) - return - } - - assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value) - assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value) - - for i, expectedStat := range test.expectedContainerStats { - actutalStat := actualContainerStats[i] - - assert.Equal(t, expectedStat.UsageCoreNanoSeconds, actutalStat.Cpu.UsageCoreNanoSeconds.Value) - assert.Equal(t, expectedStat.UsageNanoCores, actutalStat.Cpu.UsageNanoCores.Value) - } - }) - } -} - -func sandboxPod(id string, timestamp time.Time, cachedCPU uint64) sandboxstore.Sandbox { - return sandboxstore.Sandbox{ - Metadata: sandboxstore.Metadata{ID: id}, Stats: &stats.ContainerStats{ - Timestamp: timestamp, - UsageCoreNanoSeconds: cachedCPU, - }} -} - -func windowsStat(timestamp time.Time, cpu uint64, memory uint64, commitMemory uint64) *wstats.Statistics_Windows { - return &wstats.Statistics_Windows{ - Windows: &wstats.WindowsContainerStatistics{ - Timestamp: protobuf.ToTimestamp(timestamp), - Processor: &wstats.WindowsContainerProcessorStatistics{ - TotalRuntimeNS: cpu, - }, - Memory: &wstats.WindowsContainerMemoryStatistics{ - MemoryUsagePrivateWorkingSetBytes: memory, - MemoryUsageCommitBytes: commitMemory, - }, - }, - } -} - -func newContainer(id string, status containerstore.Status, stats *stats.ContainerStats) containerstore.Container { - cntr, err := containerstore.NewContainer(containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(status)) - if err != nil { - panic(err) - } - if stats != nil { - cntr.Stats = stats - } - return cntr -} - -var exitedValid = containerstore.Status{ - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - ExitCode: 0, -} - -var exitedInvalid = containerstore.Status{ - StartedAt: time.Now().UnixNano(), - FinishedAt: time.Now().UnixNano(), - ExitCode: 1, -} - -var running = containerstore.Status{ - StartedAt: time.Now().UnixNano(), -} - -func Test_criService_saveSandBoxMetrics(t *testing.T) { - - timestamp := time.Now() - containerID := "c1" - sandboxID := "s1" - for _, test := range []struct { - desc string - sandboxStats *runtime.PodSandboxStats - expectError bool - expectedSandboxvalue *stats.ContainerStats - expectedContainervalue *stats.ContainerStats - }{ - { - desc: "if sandboxstats is nil then skip ", - sandboxStats: nil, - expectError: false, - expectedSandboxvalue: nil, - }, - { - desc: "if sandboxstats.windows is nil then skip", - sandboxStats: &runtime.PodSandboxStats{ - Windows: nil, - }, - expectError: false, - expectedSandboxvalue: nil, - }, - { - desc: "if sandboxstats.windows.cpu is nil then skip", - sandboxStats: &runtime.PodSandboxStats{ - Windows: &runtime.WindowsPodSandboxStats{ - Cpu: nil, - }, - }, - expectError: false, - expectedSandboxvalue: nil, - }, - { - desc: "if sandboxstats.windows.cpu.UsageCoreNanoSeconds is nil then skip", - sandboxStats: &runtime.PodSandboxStats{ - Windows: &runtime.WindowsPodSandboxStats{ - Cpu: &runtime.WindowsCpuUsage{ - UsageCoreNanoSeconds: nil, - }, - }, - }, - expectError: false, - expectedSandboxvalue: nil, - }, - { - desc: "Stats for containers that have cpu nil are skipped", - sandboxStats: &runtime.PodSandboxStats{ - Windows: &runtime.WindowsPodSandboxStats{ - Cpu: &runtime.WindowsCpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, - }, - Containers: []*runtime.WindowsContainerStats{ - { - Attributes: &runtime.ContainerAttributes{Id: containerID}, - Cpu: nil, - }, - }, - }, - }, - expectError: false, - expectedSandboxvalue: &stats.ContainerStats{ - Timestamp: timestamp, - UsageCoreNanoSeconds: 100, - }, - expectedContainervalue: nil, - }, - { - desc: "Stats for containers that have UsageCoreNanoSeconds nil are skipped", - sandboxStats: &runtime.PodSandboxStats{ - Windows: &runtime.WindowsPodSandboxStats{ - Cpu: &runtime.WindowsCpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, - }, - Containers: []*runtime.WindowsContainerStats{ - { - Attributes: &runtime.ContainerAttributes{Id: containerID}, - Cpu: &runtime.WindowsCpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: nil}, - }, - }, - }, - }, - expectError: false, - expectedSandboxvalue: &stats.ContainerStats{ - Timestamp: timestamp, - UsageCoreNanoSeconds: 100, - }, - expectedContainervalue: nil, - }, - { - desc: "Stats are updated for sandbox and containers", - sandboxStats: &runtime.PodSandboxStats{ - Windows: &runtime.WindowsPodSandboxStats{ - Cpu: &runtime.WindowsCpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, - }, - Containers: []*runtime.WindowsContainerStats{ - { - Attributes: &runtime.ContainerAttributes{Id: containerID}, - Cpu: &runtime.WindowsCpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 50}, - }, - }, - }, - }, - }, - expectError: false, - expectedSandboxvalue: &stats.ContainerStats{ - Timestamp: timestamp, - UsageCoreNanoSeconds: 100, - }, - expectedContainervalue: &stats.ContainerStats{ - Timestamp: timestamp, - UsageCoreNanoSeconds: 50, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.sandboxStore.Add(sandboxstore.Sandbox{ - Metadata: sandboxstore.Metadata{ID: sandboxID}, - }) - - c.containerStore.Add(containerstore.Container{ - Metadata: containerstore.Metadata{ID: containerID}, - }) - - err := c.saveSandBoxMetrics(sandboxID, test.sandboxStats) - - if test.expectError { - assert.NotNil(t, err) - } else { - assert.Nil(t, err) - } - - sandbox, err := c.sandboxStore.Get(sandboxID) - assert.Nil(t, err) - - if test.expectedSandboxvalue != nil { - assert.Equal(t, test.expectedSandboxvalue.Timestamp.UnixNano(), sandbox.Stats.Timestamp.UnixNano()) - assert.Equal(t, test.expectedSandboxvalue.UsageCoreNanoSeconds, sandbox.Stats.UsageCoreNanoSeconds) - } else { - assert.Nil(t, sandbox.Stats) - } - - container, err := c.containerStore.Get(containerID) - assert.Nil(t, err) - if test.expectedContainervalue != nil { - assert.Equal(t, test.expectedContainervalue.Timestamp.UnixNano(), container.Stats.Timestamp.UnixNano()) - assert.Equal(t, test.expectedContainervalue.UsageCoreNanoSeconds, container.Stats.UsageCoreNanoSeconds) - } else { - assert.Nil(t, container.Stats) - } - }) - } -} diff --git a/pkg/cri/sbserver/sandbox_status.go b/pkg/cri/sbserver/sandbox_status.go deleted file mode 100644 index 0d547b4b6..000000000 --- a/pkg/cri/sbserver/sandbox_status.go +++ /dev/null @@ -1,139 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "time" - - "github.com/containerd/containerd/errdefs" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// PodSandboxStatus returns the status of the PodSandbox. -func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (*runtime.PodSandboxStatusResponse, error) { - sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when try to find sandbox: %w", err) - } - - ip, additionalIPs, err := c.getIPs(sandbox) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox ip: %w", err) - } - - controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox controller: %w", err) - } - - var ( - createdAt time.Time - state string - info map[string]string - ) - cstatus, err := controller.Status(ctx, sandbox.ID, r.GetVerbose()) - if err != nil { - // If the shim died unexpectedly (segfault etc.) let's set the state as - // NOTREADY and not just error out to make k8s and clients like crictl - // happy. If we get back ErrNotFound from controller.Status above while - // we're using the shim-mode controller, this is a decent indicator it - // exited unexpectedly. We can use the fact that we successfully retrieved - // the sandbox object from the store above to tell that this is true, otherwise - // if we followed the normal k8s convention of StopPodSandbox -> RemovePodSandbox, - // we wouldn't have that object in the store anymore. - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to query controller status: %w", err) - } - state = runtime.PodSandboxState_SANDBOX_NOTREADY.String() - } else { - state = cstatus.State - createdAt = cstatus.CreatedAt - info = cstatus.Info - } - - status := toCRISandboxStatus(sandbox.Metadata, state, createdAt, ip, additionalIPs) - if status.GetCreatedAt() == 0 { - // CRI doesn't allow CreatedAt == 0. - sandboxInfo, err := c.client.SandboxStore().Get(ctx, sandbox.ID) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox %q from metadata store: %w", sandbox.ID, err) - } - status.CreatedAt = sandboxInfo.CreatedAt.UnixNano() - } - - return &runtime.PodSandboxStatusResponse{ - Status: status, - Info: info, - }, nil -} - -func (c *criService) getIPs(sandbox sandboxstore.Sandbox) (string, []string, error) { - config := sandbox.Config - - // For sandboxes using the node network we are not - // responsible for reporting the IP. - if hostNetwork(config) { - return "", nil, nil - } - - if closed, err := sandbox.NetNS.Closed(); err != nil { - return "", nil, fmt.Errorf("check network namespace closed: %w", err) - } else if closed { - return "", nil, nil - } - - return sandbox.IP, sandbox.AdditionalIPs, nil -} - -// toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status. -func toCRISandboxStatus(meta sandboxstore.Metadata, status string, createdAt time.Time, ip string, additionalIPs []string) *runtime.PodSandboxStatus { - // Set sandbox state to NOTREADY by default. - state := runtime.PodSandboxState_SANDBOX_NOTREADY - if value, ok := runtime.PodSandboxState_value[status]; ok { - state = runtime.PodSandboxState(value) - } - nsOpts := meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions() - var ips []*runtime.PodIP - for _, additionalIP := range additionalIPs { - ips = append(ips, &runtime.PodIP{Ip: additionalIP}) - } - return &runtime.PodSandboxStatus{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - State: state, - CreatedAt: createdAt.UnixNano(), - Network: &runtime.PodSandboxNetworkStatus{ - Ip: ip, - AdditionalIps: ips, - }, - Linux: &runtime.LinuxPodSandboxStatus{ - Namespaces: &runtime.Namespace{ - Options: &runtime.NamespaceOption{ - Network: nsOpts.GetNetwork(), - Pid: nsOpts.GetPid(), - Ipc: nsOpts.GetIpc(), - }, - }, - }, - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - RuntimeHandler: meta.RuntimeHandler, - } -} diff --git a/pkg/cri/sbserver/sandbox_status_test.go b/pkg/cri/sbserver/sandbox_status_test.go deleted file mode 100644 index 5f942b79b..000000000 --- a/pkg/cri/sbserver/sandbox_status_test.go +++ /dev/null @@ -1,118 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -func TestPodSandboxStatus(t *testing.T) { - const ( - id = "test-id" - ip = "10.10.10.10" - ) - additionalIPs := []string{"8.8.8.8", "2001:db8:85a3::8a2e:370:7334"} - createdAt := time.Now() - config := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-name", - Uid: "test-uid", - Namespace: "test-ns", - Attempt: 1, - }, - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_CONTAINER, - Ipc: runtime.NamespaceMode_POD, - }, - }, - }, - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - } - metadata := sandboxstore.Metadata{ - ID: id, - Name: "test-name", - Config: config, - RuntimeHandler: "test-runtime-handler", - } - - expected := &runtime.PodSandboxStatus{ - Id: id, - Metadata: config.GetMetadata(), - CreatedAt: createdAt.UnixNano(), - Network: &runtime.PodSandboxNetworkStatus{ - Ip: ip, - AdditionalIps: []*runtime.PodIP{ - { - Ip: additionalIPs[0], - }, - { - Ip: additionalIPs[1], - }, - }, - }, - Linux: &runtime.LinuxPodSandboxStatus{ - Namespaces: &runtime.Namespace{ - Options: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_CONTAINER, - Ipc: runtime.NamespaceMode_POD, - }, - }, - }, - Labels: config.GetLabels(), - Annotations: config.GetAnnotations(), - RuntimeHandler: "test-runtime-handler", - } - for _, test := range []struct { - desc string - state string - expectedState runtime.PodSandboxState - }{ - { - desc: "sandbox state ready", - state: sandboxstore.StateReady.String(), - expectedState: runtime.PodSandboxState_SANDBOX_READY, - }, - { - desc: "sandbox state not ready", - state: sandboxstore.StateNotReady.String(), - expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, - }, - { - desc: "sandbox state unknown", - state: sandboxstore.StateUnknown.String(), - expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - expected.State = test.expectedState - got := toCRISandboxStatus(metadata, test.state, createdAt, ip, additionalIPs) - assert.Equal(t, expected, got) - }) - } -} diff --git a/pkg/cri/sbserver/sandbox_stop.go b/pkg/cri/sbserver/sandbox_stop.go deleted file mode 100644 index 3346364cf..000000000 --- a/pkg/cri/sbserver/sandbox_stop.go +++ /dev/null @@ -1,149 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "fmt" - "time" - - "github.com/containerd/log" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -// StopPodSandbox stops the sandbox. If there are any running containers in the -// sandbox, they should be forcibly terminated. -func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (*runtime.StopPodSandboxResponse, error) { - sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) - if err != nil { - return nil, fmt.Errorf("an error occurred when try to find sandbox %q: %w", - r.GetPodSandboxId(), err) - } - - if err := c.stopPodSandbox(ctx, sandbox); err != nil { - return nil, err - } - - return &runtime.StopPodSandboxResponse{}, nil -} - -func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sandbox) error { - // Use the full sandbox id. - id := sandbox.ID - - // Stop all containers inside the sandbox. This terminates the container forcibly, - // and container may still be created, so production should not rely on this behavior. - // TODO(random-liu): Introduce a state in sandbox to avoid future container creation. - stop := time.Now() - containers := c.containerStore.List() - for _, container := range containers { - if container.SandboxID != id { - continue - } - // Forcibly stop the container. Do not use `StopContainer`, because it introduces a race - // if a container is removed after list. - if err := c.stopContainer(ctx, container, 0); err != nil { - return fmt.Errorf("failed to stop container %q: %w", container.ID, err) - } - } - - // Only stop sandbox container when it's running or unknown. - state := sandbox.Status.Get().State - if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown { - // Use sandbox controller to stop sandbox - controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) - if err != nil { - return fmt.Errorf("failed to get sandbox controller: %w", err) - } - - if err := controller.Stop(ctx, id); err != nil { - return fmt.Errorf("failed to stop sandbox %q: %w", id, err) - } - } - - sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop) - - err := c.nri.StopPodSandbox(ctx, &sandbox) - if err != nil { - log.G(ctx).WithError(err).Errorf("NRI sandbox stop notification failed") - } - - // Teardown network for sandbox. - if sandbox.NetNS != nil { - netStop := time.Now() - // Use empty netns path if netns is not available. This is defined in: - // https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md - if closed, err := sandbox.NetNS.Closed(); err != nil { - return fmt.Errorf("failed to check network namespace closed: %w", err) - } else if closed { - sandbox.NetNSPath = "" - } - if err := c.teardownPodNetwork(ctx, sandbox); err != nil { - return fmt.Errorf("failed to destroy network for sandbox %q: %w", id, err) - } - if err := sandbox.NetNS.Remove(); err != nil { - return fmt.Errorf("failed to remove network namespace for sandbox %q: %w", id, err) - } - sandboxDeleteNetwork.UpdateSince(netStop) - } - - log.G(ctx).Infof("TearDown network for sandbox %q successfully", id) - - return nil -} - -// waitSandboxStop waits for sandbox to be stopped until context is cancelled or -// the context deadline is exceeded. -func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox) error { - select { - case <-ctx.Done(): - return fmt.Errorf("wait sandbox container %q: %w", sandbox.ID, ctx.Err()) - case <-sandbox.Stopped(): - return nil - } -} - -// teardownPodNetwork removes the network from the pod -func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstore.Sandbox) error { - netPlugin := c.getNetworkPlugin(sandbox.RuntimeHandler) - if netPlugin == nil { - return errors.New("cni config not initialized") - } - - var ( - id = sandbox.ID - path = sandbox.NetNSPath - config = sandbox.Config - ) - opts, err := cniNamespaceOpts(id, config) - if err != nil { - return fmt.Errorf("get cni namespace options: %w", err) - } - - netStart := time.Now() - err = netPlugin.Remove(ctx, id, path, opts...) - networkPluginOperations.WithValues(networkTearDownOp).Inc() - networkPluginOperationsLatency.WithValues(networkTearDownOp).UpdateSince(netStart) - if err != nil { - networkPluginOperationsErrors.WithValues(networkTearDownOp).Inc() - return err - } - return nil -} diff --git a/pkg/cri/sbserver/sandbox_stop_test.go b/pkg/cri/sbserver/sandbox_stop_test.go deleted file mode 100644 index 09235768e..000000000 --- a/pkg/cri/sbserver/sandbox_stop_test.go +++ /dev/null @@ -1,80 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "testing" - "time" - - "github.com/stretchr/testify/assert" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" -) - -func TestWaitSandboxStop(t *testing.T) { - id := "test-id" - for _, test := range []struct { - desc string - state sandboxstore.State - cancel bool - timeout time.Duration - expectErr bool - }{ - { - desc: "should return error if timeout exceeds", - state: sandboxstore.StateReady, - timeout: 200 * time.Millisecond, - expectErr: true, - }, - { - desc: "should return error if context is cancelled", - state: sandboxstore.StateReady, - timeout: time.Hour, - cancel: true, - expectErr: true, - }, - { - desc: "should not return error if sandbox is stopped before timeout", - state: sandboxstore.StateNotReady, - timeout: time.Hour, - expectErr: false, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - sandbox := sandboxstore.NewSandbox( - sandboxstore.Metadata{ID: id}, - sandboxstore.Status{State: test.state}, - ) - ctx := context.Background() - if test.cancel { - cancelledCtx, cancel := context.WithCancel(ctx) - cancel() - ctx = cancelledCtx - } - if test.timeout > 0 { - timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) - defer cancel() - ctx = timeoutCtx - } - err := c.waitSandboxStop(ctx, sandbox) - assert.Equal(t, test.expectErr, err != nil, test.desc) - }) - } -} diff --git a/pkg/cri/sbserver/service.go b/pkg/cri/sbserver/service.go deleted file mode 100644 index 29dbe4e36..000000000 --- a/pkg/cri/sbserver/service.go +++ /dev/null @@ -1,405 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "os" - "path/filepath" - "sync" - "sync/atomic" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/pkg/cri/instrument" - "github.com/containerd/containerd/pkg/cri/nri" - "github.com/containerd/containerd/pkg/cri/sbserver/images" - "github.com/containerd/containerd/pkg/cri/sbserver/podsandbox" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" - snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" - "github.com/containerd/containerd/pkg/cri/streaming" - "github.com/containerd/containerd/plugin" - "github.com/containerd/containerd/sandbox" - "github.com/containerd/go-cni" - "github.com/containerd/log" - "google.golang.org/grpc" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/store/label" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" - osinterface "github.com/containerd/containerd/pkg/os" - "github.com/containerd/containerd/pkg/registrar" -) - -// defaultNetworkPlugin is used for the default CNI configuration -const defaultNetworkPlugin = "default" - -// CRIService is the interface implement CRI remote service server. -type CRIService interface { - runtime.RuntimeServiceServer - runtime.ImageServiceServer - // Closer is used by containerd to gracefully stop cri service. - io.Closer - - Run(ready func()) error - - Register(*grpc.Server) error -} - -// imageService specifies dependencies to image service. -type imageService interface { - runtime.ImageServiceServer - - RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string - - UpdateImage(ctx context.Context, r string) error - - GetImage(id string) (imagestore.Image, error) - GetSnapshot(key string) (snapshotstore.Snapshot, error) - - LocalResolve(refOrID string) (imagestore.Image, error) -} - -// criService implements CRIService. -type criService struct { - imageService - // config contains all configurations. - config criconfig.Config - // imageFSPath is the path to image filesystem. - imageFSPath string - // os is an interface for all required os operations. - os osinterface.OS - // sandboxStore stores all resources associated with sandboxes. - sandboxStore *sandboxstore.Store - // sandboxNameIndex stores all sandbox names and make sure each name - // is unique. - sandboxNameIndex *registrar.Registrar - // containerStore stores all resources associated with containers. - containerStore *containerstore.Store - // sandboxControllers contains different sandbox controller type, - // every controller controls sandbox lifecycle (and hides implementation details behind). - sandboxControllers map[criconfig.SandboxControllerMode]sandbox.Controller - // containerNameIndex stores all container names and make sure each - // name is unique. - containerNameIndex *registrar.Registrar - // netPlugin is used to setup and teardown network when run/stop pod sandbox. - netPlugin map[string]cni.CNI - // client is an instance of the containerd client - client *containerd.Client - // streamServer is the streaming server serves container streaming request. - streamServer streaming.Server - // eventMonitor is the monitor monitors containerd events. - eventMonitor *eventMonitor - // initialized indicates whether the server is initialized. All GRPC services - // should return error before the server is initialized. - initialized atomic.Bool - // cniNetConfMonitor is used to reload cni network conf if there is - // any valid fs change events from cni network conf dir. - cniNetConfMonitor map[string]*cniNetConfSyncer - // baseOCISpecs contains cached OCI specs loaded via `Runtime.BaseRuntimeSpec` - baseOCISpecs map[string]*oci.Spec - // allCaps is the list of the capabilities. - // When nil, parsed from CapEff of /proc/self/status. - allCaps []string //nolint:nolintlint,unused // Ignore on non-Linux - // containerEventsChan is used to capture container events and send them - // to the caller of GetContainerEvents. - containerEventsChan chan runtime.ContainerEventResponse - // nri is used to hook NRI into CRI request processing. - nri *nri.API -} - -// NewCRIService returns a new instance of CRIService -func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.API) (CRIService, error) { - var err error - labels := label.NewStore() - - if client.SnapshotService(config.ContainerdConfig.Snapshotter) == nil { - return nil, fmt.Errorf("failed to find snapshotter %q", config.ContainerdConfig.Snapshotter) - } - - imageFSPath := imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter) - log.L.Infof("Get image filesystem path %q", imageFSPath) - - // TODO: expose this as a separate containerd plugin. - imageService, err := images.NewService(config, imageFSPath, client) - if err != nil { - return nil, fmt.Errorf("unable to create CRI image service: %w", err) - } - - c := &criService{ - imageService: imageService, - config: config, - client: client, - imageFSPath: imageFSPath, - os: osinterface.RealOS{}, - sandboxStore: sandboxstore.NewStore(labels), - containerStore: containerstore.NewStore(labels), - sandboxNameIndex: registrar.NewRegistrar(), - containerNameIndex: registrar.NewRegistrar(), - netPlugin: make(map[string]cni.CNI), - sandboxControllers: make(map[criconfig.SandboxControllerMode]sandbox.Controller), - } - - // TODO: figure out a proper channel size. - c.containerEventsChan = make(chan runtime.ContainerEventResponse, 1000) - - if err := c.initPlatform(); err != nil { - return nil, fmt.Errorf("initialize platform: %w", err) - } - - // prepare streaming server - c.streamServer, err = newStreamServer(c, config.StreamServerAddress, config.StreamServerPort, config.StreamIdleTimeout) - if err != nil { - return nil, fmt.Errorf("failed to create stream server: %w", err) - } - - c.eventMonitor = newEventMonitor(c) - - c.cniNetConfMonitor = make(map[string]*cniNetConfSyncer) - for name, i := range c.netPlugin { - path := c.config.NetworkPluginConfDir - if name != defaultNetworkPlugin { - if rc, ok := c.config.Runtimes[name]; ok { - path = rc.NetworkPluginConfDir - } - } - if path != "" { - m, err := newCNINetConfSyncer(path, i, c.cniLoadOptions()) - if err != nil { - return nil, fmt.Errorf("failed to create cni conf monitor for %s: %w", name, err) - } - c.cniNetConfMonitor[name] = m - } - } - - // Preload base OCI specs - c.baseOCISpecs, err = loadBaseOCISpecs(&config) - if err != nil { - return nil, err - } - - // Load all sandbox controllers(pod sandbox controller and remote shim controller) - c.sandboxControllers[criconfig.ModePodSandbox] = podsandbox.New(config, client, c.sandboxStore, c.os, c, imageService, c.baseOCISpecs) - c.sandboxControllers[criconfig.ModeShim] = client.SandboxController() - - c.nri = nri - - return c, nil -} - -// BackOffEvent is a temporary workaround to call eventMonitor from controller.Stop. -// TODO: get rid of this. -func (c *criService) BackOffEvent(id string, event interface{}) { - c.eventMonitor.backOff.enBackOff(id, event) -} - -// Register registers all required services onto a specific grpc server. -// This is used by containerd cri plugin. -func (c *criService) Register(s *grpc.Server) error { - return c.register(s) -} - -// RegisterTCP register all required services onto a GRPC server on TCP. -// This is used by containerd CRI plugin. -func (c *criService) RegisterTCP(s *grpc.Server) error { - if !c.config.DisableTCPService { - return c.register(s) - } - return nil -} - -// Run starts the CRI service. -func (c *criService) Run(ready func()) error { - log.L.Info("Start subscribing containerd event") - c.eventMonitor.subscribe(c.client) - - log.L.Infof("Start recovering state") - if err := c.recover(ctrdutil.NamespacedContext()); err != nil { - return fmt.Errorf("failed to recover state: %w", err) - } - - // Start event handler. - log.L.Info("Start event monitor") - eventMonitorErrCh := c.eventMonitor.start() - - // Start CNI network conf syncers - cniNetConfMonitorErrCh := make(chan error, len(c.cniNetConfMonitor)) - var netSyncGroup sync.WaitGroup - for name, h := range c.cniNetConfMonitor { - netSyncGroup.Add(1) - log.L.Infof("Start cni network conf syncer for %s", name) - go func(h *cniNetConfSyncer) { - cniNetConfMonitorErrCh <- h.syncLoop() - netSyncGroup.Done() - }(h) - } - // For platforms that may not support CNI (darwin etc.) there's no - // use in launching this as `Wait` will return immediately. Further - // down we select on this channel along with some others to determine - // if we should Close() the CRI service, so closing this preemptively - // isn't good. - if len(c.cniNetConfMonitor) > 0 { - go func() { - netSyncGroup.Wait() - close(cniNetConfMonitorErrCh) - }() - } - - // Start streaming server. - log.L.Info("Start streaming server") - streamServerErrCh := make(chan error) - go func() { - defer close(streamServerErrCh) - if err := c.streamServer.Start(true); err != nil && err != http.ErrServerClosed { - log.L.WithError(err).Error("Failed to start streaming server") - streamServerErrCh <- err - } - }() - - // register CRI domain with NRI - if err := c.nri.Register(&criImplementation{c}); err != nil { - return fmt.Errorf("failed to set up NRI for CRI service: %w", err) - } - - // Set the server as initialized. GRPC services could start serving traffic. - c.initialized.Store(true) - ready() - - var eventMonitorErr, streamServerErr, cniNetConfMonitorErr error - // Stop the whole CRI service if any of the critical service exits. - select { - case eventMonitorErr = <-eventMonitorErrCh: - case streamServerErr = <-streamServerErrCh: - case cniNetConfMonitorErr = <-cniNetConfMonitorErrCh: - } - if err := c.Close(); err != nil { - return fmt.Errorf("failed to stop cri service: %w", err) - } - // If the error is set above, err from channel must be nil here, because - // the channel is supposed to be closed. Or else, we wait and set it. - if err := <-eventMonitorErrCh; err != nil { - eventMonitorErr = err - } - log.L.Info("Event monitor stopped") - if err := <-streamServerErrCh; err != nil { - streamServerErr = err - } - log.L.Info("Stream server stopped") - if eventMonitorErr != nil { - return fmt.Errorf("event monitor error: %w", eventMonitorErr) - } - if streamServerErr != nil { - return fmt.Errorf("stream server error: %w", streamServerErr) - } - if cniNetConfMonitorErr != nil { - return fmt.Errorf("cni network conf monitor error: %w", cniNetConfMonitorErr) - } - return nil -} - -// Close stops the CRI service. -// TODO(random-liu): Make close synchronous. -func (c *criService) Close() error { - log.L.Info("Stop CRI service") - for name, h := range c.cniNetConfMonitor { - if err := h.stop(); err != nil { - log.L.WithError(err).Errorf("failed to stop cni network conf monitor for %s", name) - } - } - c.eventMonitor.stop() - if err := c.streamServer.Stop(); err != nil { - return fmt.Errorf("failed to stop stream server: %w", err) - } - return nil -} - -// IsInitialized indicates whether CRI service has finished initialization. -func (c *criService) IsInitialized() bool { - return c.initialized.Load() -} - -func (c *criService) register(s *grpc.Server) error { - instrumented := instrument.NewService(c) - runtime.RegisterRuntimeServiceServer(s, instrumented) - runtime.RegisterImageServiceServer(s, instrumented) - return nil -} - -// imageFSPath returns containerd image filesystem path. -// Note that if containerd changes directory layout, we also needs to change this. -func imageFSPath(rootDir, snapshotter string) string { - return filepath.Join(rootDir, plugin.SnapshotPlugin.String()+"."+snapshotter) -} - -func loadOCISpec(filename string) (*oci.Spec, error) { - file, err := os.Open(filename) - if err != nil { - return nil, fmt.Errorf("failed to open base OCI spec: %s: %w", filename, err) - } - defer file.Close() - - spec := oci.Spec{} - if err := json.NewDecoder(file).Decode(&spec); err != nil { - return nil, fmt.Errorf("failed to parse base OCI spec file: %w", err) - } - - return &spec, nil -} - -func loadBaseOCISpecs(config *criconfig.Config) (map[string]*oci.Spec, error) { - specs := map[string]*oci.Spec{} - for _, cfg := range config.Runtimes { - if cfg.BaseRuntimeSpec == "" { - continue - } - - // Don't load same file twice - if _, ok := specs[cfg.BaseRuntimeSpec]; ok { - continue - } - - spec, err := loadOCISpec(cfg.BaseRuntimeSpec) - if err != nil { - return nil, fmt.Errorf("failed to load base OCI spec from file: %s: %w", cfg.BaseRuntimeSpec, err) - } - - specs[cfg.BaseRuntimeSpec] = spec - } - - return specs, nil -} - -// ValidateMode validate the given mod value, -// returns err if mod is empty or unknown -func ValidateMode(modeStr string) error { - switch modeStr { - case string(criconfig.ModePodSandbox), string(criconfig.ModeShim): - return nil - case "": - return fmt.Errorf("empty sandbox controller mode") - default: - return fmt.Errorf("unknown sandbox controller mode: %s", modeStr) - } -} diff --git a/pkg/cri/sbserver/service_linux.go b/pkg/cri/sbserver/service_linux.go deleted file mode 100644 index 79ed66ccc..000000000 --- a/pkg/cri/sbserver/service_linux.go +++ /dev/null @@ -1,106 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "fmt" - - "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" - "github.com/opencontainers/selinux/go-selinux" - - "github.com/containerd/containerd/pkg/cap" - "github.com/containerd/containerd/pkg/userns" - "github.com/containerd/go-cni" - "github.com/containerd/log" -) - -// networkAttachCount is the minimum number of networks the PodSandbox -// attaches to -const networkAttachCount = 2 - -// initPlatform handles linux specific initialization for the CRI service. -func (c *criService) initPlatform() (err error) { - if userns.RunningInUserNS() { - if c.apparmorEnabled() || !c.config.RestrictOOMScoreAdj { - log.L.Warn("Running CRI plugin in a user namespace typically requires disable_apparmor and restrict_oom_score_adj to be true") - } - } - - if c.config.EnableSelinux { - if !selinux.GetEnabled() { - log.L.Warn("Selinux is not supported") - } - if r := c.config.SelinuxCategoryRange; r > 0 { - selinux.CategoryRange = uint32(r) - } - } else { - selinux.SetDisabled() - } - - pluginDirs := map[string]string{ - defaultNetworkPlugin: c.config.NetworkPluginConfDir, - } - for name, conf := range c.config.Runtimes { - if conf.NetworkPluginConfDir != "" { - pluginDirs[name] = conf.NetworkPluginConfDir - } - } - - c.netPlugin = make(map[string]cni.CNI) - for name, dir := range pluginDirs { - max := c.config.NetworkPluginMaxConfNum - if name != defaultNetworkPlugin { - if m := c.config.Runtimes[name].NetworkPluginMaxConfNum; m != 0 { - max = m - } - } - // Pod needs to attach to at least loopback network and a non host network, - // hence networkAttachCount is 2. If there are more network configs the - // pod will be attached to all the networks but we will only use the ip - // of the default network interface as the pod IP. - i, err := cni.New(cni.WithMinNetworkCount(networkAttachCount), - cni.WithPluginConfDir(dir), - cni.WithPluginMaxConfNum(max), - cni.WithPluginDir([]string{c.config.NetworkPluginBinDir})) - if err != nil { - return fmt.Errorf("failed to initialize cni: %w", err) - } - c.netPlugin[name] = i - } - - if c.allCaps == nil { - c.allCaps, err = cap.Current() - if err != nil { - return fmt.Errorf("failed to get caps: %w", err) - } - } - - if c.config.EnableCDI { - reg := cdi.GetRegistry() - err = reg.Configure(cdi.WithSpecDirs(c.config.CDISpecDirs...)) - if err != nil { - return fmt.Errorf("failed to configure CDI registry") - } - } - - return nil -} - -// cniLoadOptions returns cni load options for the linux. -func (c *criService) cniLoadOptions() []cni.Opt { - return []cni.Opt{cni.WithLoNetwork, cni.WithDefaultConf} -} diff --git a/pkg/cri/sbserver/service_other.go b/pkg/cri/sbserver/service_other.go deleted file mode 100644 index 053178600..000000000 --- a/pkg/cri/sbserver/service_other.go +++ /dev/null @@ -1,35 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "github.com/containerd/go-cni" -) - -// initPlatform handles initialization of the CRI service for non-windows -// and non-linux platforms. -func (c *criService) initPlatform() error { - return nil -} - -// cniLoadOptions returns cni load options for non-windows and non-linux -// platforms. -func (c *criService) cniLoadOptions() []cni.Opt { - return []cni.Opt{} -} diff --git a/pkg/cri/sbserver/service_test.go b/pkg/cri/sbserver/service_test.go deleted file mode 100644 index b99ec00d9..000000000 --- a/pkg/cri/sbserver/service_test.go +++ /dev/null @@ -1,98 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "encoding/json" - "os" - "testing" - - "github.com/containerd/containerd/oci" - "github.com/containerd/go-cni" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - servertesting "github.com/containerd/containerd/pkg/cri/server/testing" - containerstore "github.com/containerd/containerd/pkg/cri/store/container" - "github.com/containerd/containerd/pkg/cri/store/label" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - ostesting "github.com/containerd/containerd/pkg/os/testing" - "github.com/containerd/containerd/pkg/registrar" -) - -// newTestCRIService creates a fake criService for test. -func newTestCRIService() *criService { - labels := label.NewStore() - return &criService{ - imageService: &fakeImageService{}, - config: testConfig, - os: ostesting.NewFakeOS(), - sandboxStore: sandboxstore.NewStore(labels), - sandboxNameIndex: registrar.NewRegistrar(), - containerStore: containerstore.NewStore(labels), - containerNameIndex: registrar.NewRegistrar(), - netPlugin: map[string]cni.CNI{ - defaultNetworkPlugin: servertesting.NewFakeCNIPlugin(), - }, - } -} - -func TestLoadBaseOCISpec(t *testing.T) { - spec := oci.Spec{Version: "1.0.2", Hostname: "default"} - - file, err := os.CreateTemp("", "spec-test-") - require.NoError(t, err) - - defer func() { - assert.NoError(t, file.Close()) - assert.NoError(t, os.RemoveAll(file.Name())) - }() - - err = json.NewEncoder(file).Encode(&spec) - assert.NoError(t, err) - - config := criconfig.Config{} - config.Runtimes = map[string]criconfig.Runtime{ - "runc": {BaseRuntimeSpec: file.Name()}, - } - - specs, err := loadBaseOCISpecs(&config) - assert.NoError(t, err) - - assert.Len(t, specs, 1) - - out, ok := specs[file.Name()] - assert.True(t, ok, "expected spec with file name %q", file.Name()) - - assert.Equal(t, "1.0.2", out.Version) - assert.Equal(t, "default", out.Hostname) -} - -func TestValidateMode(t *testing.T) { - mode := "" - assert.Error(t, ValidateMode(mode)) - - mode = "podsandbox" - assert.NoError(t, ValidateMode(mode)) - - mode = "shim" - assert.NoError(t, ValidateMode(mode)) - - mode = "nonexistent" - assert.Error(t, ValidateMode(mode)) -} diff --git a/pkg/cri/sbserver/service_windows.go b/pkg/cri/sbserver/service_windows.go deleted file mode 100644 index 2f73d248a..000000000 --- a/pkg/cri/sbserver/service_windows.go +++ /dev/null @@ -1,69 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "fmt" - - "github.com/containerd/go-cni" -) - -// windowsNetworkAttachCount is the minimum number of networks the PodSandbox -// attaches to -const windowsNetworkAttachCount = 1 - -// initPlatform handles windows specific initialization for the CRI service. -func (c *criService) initPlatform() error { - pluginDirs := map[string]string{ - defaultNetworkPlugin: c.config.NetworkPluginConfDir, - } - for name, conf := range c.config.Runtimes { - if conf.NetworkPluginConfDir != "" { - pluginDirs[name] = conf.NetworkPluginConfDir - } - } - - c.netPlugin = make(map[string]cni.CNI) - for name, dir := range pluginDirs { - max := c.config.NetworkPluginMaxConfNum - if name != defaultNetworkPlugin { - if m := c.config.Runtimes[name].NetworkPluginMaxConfNum; m != 0 { - max = m - } - } - // For windows, the loopback network is added as default. - // There is no need to explicitly add one hence networkAttachCount is 1. - // If there are more network configs the pod will be attached to all the - // networks but we will only use the ip of the default network interface - // as the pod IP. - i, err := cni.New(cni.WithMinNetworkCount(windowsNetworkAttachCount), - cni.WithPluginConfDir(dir), - cni.WithPluginMaxConfNum(max), - cni.WithPluginDir([]string{c.config.NetworkPluginBinDir})) - if err != nil { - return fmt.Errorf("failed to initialize cni: %w", err) - } - c.netPlugin[name] = i - } - - return nil -} - -// cniLoadOptions returns cni load options for the windows. -func (c *criService) cniLoadOptions() []cni.Opt { - return []cni.Opt{cni.WithDefaultConf} -} diff --git a/pkg/cri/sbserver/status.go b/pkg/cri/sbserver/status.go deleted file mode 100644 index e2547fdba..000000000 --- a/pkg/cri/sbserver/status.go +++ /dev/null @@ -1,98 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "encoding/json" - "fmt" - goruntime "runtime" - - "github.com/containerd/log" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// networkNotReadyReason is the reason reported when network is not ready. -const networkNotReadyReason = "NetworkPluginNotReady" - -// Status returns the status of the runtime. -func (c *criService) Status(ctx context.Context, r *runtime.StatusRequest) (*runtime.StatusResponse, error) { - // As a containerd plugin, if CRI plugin is serving request, - // containerd must be ready. - runtimeCondition := &runtime.RuntimeCondition{ - Type: runtime.RuntimeReady, - Status: true, - } - networkCondition := &runtime.RuntimeCondition{ - Type: runtime.NetworkReady, - Status: true, - } - netPlugin := c.netPlugin[defaultNetworkPlugin] - // Check the status of the cni initialization - if netPlugin != nil { - if err := netPlugin.Status(); err != nil { - networkCondition.Status = false - networkCondition.Reason = networkNotReadyReason - networkCondition.Message = fmt.Sprintf("Network plugin returns error: %v", err) - } - } - - resp := &runtime.StatusResponse{ - Status: &runtime.RuntimeStatus{Conditions: []*runtime.RuntimeCondition{ - runtimeCondition, - networkCondition, - }}, - } - if r.Verbose { - configByt, err := json.Marshal(c.config) - if err != nil { - return nil, err - } - resp.Info = make(map[string]string) - resp.Info["config"] = string(configByt) - versionByt, err := json.Marshal(goruntime.Version()) - if err != nil { - return nil, err - } - resp.Info["golang"] = string(versionByt) - - if netPlugin != nil { - cniConfig, err := json.Marshal(netPlugin.GetConfig()) - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to marshal CNI config %v", err) - } - resp.Info["cniconfig"] = string(cniConfig) - } - - defaultStatus := "OK" - for name, h := range c.cniNetConfMonitor { - s := "OK" - if h == nil { - continue - } - if lerr := h.lastStatus(); lerr != nil { - s = lerr.Error() - } - resp.Info[fmt.Sprintf("lastCNILoadStatus.%s", name)] = s - if name == defaultNetworkPlugin { - defaultStatus = s - } - } - resp.Info["lastCNILoadStatus"] = defaultStatus - } - return resp, nil -} diff --git a/pkg/cri/sbserver/streaming.go b/pkg/cri/sbserver/streaming.go deleted file mode 100644 index e2ba8fa14..000000000 --- a/pkg/cri/sbserver/streaming.go +++ /dev/null @@ -1,240 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "crypto/tls" - "errors" - "fmt" - "io" - "math" - "net" - "os" - "time" - - k8snet "k8s.io/apimachinery/pkg/util/net" - "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/tools/remotecommand" - k8scert "k8s.io/client-go/util/cert" - "k8s.io/utils/exec" - - "github.com/containerd/containerd/pkg/cri/streaming" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" -) - -type streamListenerMode int - -const ( - x509KeyPairTLS streamListenerMode = iota - selfSignTLS - withoutTLS -) - -func getStreamListenerMode(c *criService) (streamListenerMode, error) { - if c.config.EnableTLSStreaming { - if c.config.X509KeyPairStreaming.TLSCertFile != "" && c.config.X509KeyPairStreaming.TLSKeyFile != "" { - return x509KeyPairTLS, nil - } - if c.config.X509KeyPairStreaming.TLSCertFile != "" && c.config.X509KeyPairStreaming.TLSKeyFile == "" { - return -1, errors.New("must set X509KeyPairStreaming.TLSKeyFile") - } - if c.config.X509KeyPairStreaming.TLSCertFile == "" && c.config.X509KeyPairStreaming.TLSKeyFile != "" { - return -1, errors.New("must set X509KeyPairStreaming.TLSCertFile") - } - return selfSignTLS, nil - } - if c.config.X509KeyPairStreaming.TLSCertFile != "" { - return -1, errors.New("X509KeyPairStreaming.TLSCertFile is set but EnableTLSStreaming is not set") - } - if c.config.X509KeyPairStreaming.TLSKeyFile != "" { - return -1, errors.New("X509KeyPairStreaming.TLSKeyFile is set but EnableTLSStreaming is not set") - } - return withoutTLS, nil -} - -func newStreamServer(c *criService, addr, port, streamIdleTimeout string) (streaming.Server, error) { - if addr == "" { - a, err := k8snet.ResolveBindAddress(nil) - if err != nil { - return nil, fmt.Errorf("failed to get stream server address: %w", err) - } - addr = a.String() - } - config := streaming.DefaultConfig - if streamIdleTimeout != "" { - var err error - config.StreamIdleTimeout, err = time.ParseDuration(streamIdleTimeout) - if err != nil { - return nil, fmt.Errorf("invalid stream idle timeout: %w", err) - } - } - config.Addr = net.JoinHostPort(addr, port) - run := newStreamRuntime(c) - tlsMode, err := getStreamListenerMode(c) - if err != nil { - return nil, fmt.Errorf("invalid stream server configuration: %w", err) - } - switch tlsMode { - case x509KeyPairTLS: - tlsCert, err := tls.LoadX509KeyPair(c.config.X509KeyPairStreaming.TLSCertFile, c.config.X509KeyPairStreaming.TLSKeyFile) - if err != nil { - return nil, fmt.Errorf("failed to load x509 key pair for stream server: %w", err) - } - config.TLSConfig = &tls.Config{ - Certificates: []tls.Certificate{tlsCert}, - } - return streaming.NewServer(config, run) - case selfSignTLS: - tlsCert, err := newTLSCert() - if err != nil { - return nil, fmt.Errorf("failed to generate tls certificate for stream server: %w", err) - } - config.TLSConfig = &tls.Config{ - Certificates: []tls.Certificate{tlsCert}, - InsecureSkipVerify: true, - } - return streaming.NewServer(config, run) - case withoutTLS: - return streaming.NewServer(config, run) - default: - return nil, errors.New("invalid configuration for the stream listener") - } -} - -type streamRuntime struct { - c *criService -} - -func newStreamRuntime(c *criService) streaming.Runtime { - return &streamRuntime{c: c} -} - -// Exec executes a command inside the container. exec.ExitError is returned if the command -// returns non-zero exit code. -func (s *streamRuntime) Exec(containerID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, - tty bool, resize <-chan remotecommand.TerminalSize) error { - exitCode, err := s.c.execInContainer(ctrdutil.NamespacedContext(), containerID, execOptions{ - cmd: cmd, - stdin: stdin, - stdout: stdout, - stderr: stderr, - tty: tty, - resize: resize, - }) - if err != nil { - return fmt.Errorf("failed to exec in container: %w", err) - } - if *exitCode == 0 { - return nil - } - return &exec.CodeExitError{ - Err: fmt.Errorf("error executing command %v, exit code %d", cmd, *exitCode), - Code: int(*exitCode), - } -} - -func (s *streamRuntime) Attach(containerID string, in io.Reader, out, err io.WriteCloser, tty bool, - resize <-chan remotecommand.TerminalSize) error { - return s.c.attachContainer(ctrdutil.NamespacedContext(), containerID, in, out, err, tty, resize) -} - -func (s *streamRuntime) PortForward(podSandboxID string, port int32, stream io.ReadWriteCloser) error { - if port <= 0 || port > math.MaxUint16 { - return fmt.Errorf("invalid port %d", port) - } - ctx := ctrdutil.NamespacedContext() - return s.c.portForward(ctx, podSandboxID, port, stream) -} - -// handleResizing spawns a goroutine that processes the resize channel, calling resizeFunc for each -// remotecommand.TerminalSize received from the channel. -func handleResizing(ctx context.Context, resize <-chan remotecommand.TerminalSize, resizeFunc func(size remotecommand.TerminalSize)) { - if resize == nil { - return - } - - go func() { - defer runtime.HandleCrash() - - for { - select { - case <-ctx.Done(): - return - case size, ok := <-resize: - if !ok { - return - } - if size.Height < 1 || size.Width < 1 { - continue - } - resizeFunc(size) - } - } - }() -} - -// newTLSCert returns a self CA signed tls.certificate. -// TODO (mikebrow): replace / rewrite this function to support using CA -// signing of the certificate. Requires a security plan for kubernetes regarding -// CRI connections / streaming, etc. For example, kubernetes could configure or -// require a CA service and pass a configuration down through CRI. -func newTLSCert() (tls.Certificate, error) { - fail := func(err error) (tls.Certificate, error) { return tls.Certificate{}, err } - - hostName, err := os.Hostname() - if err != nil { - return fail(fmt.Errorf("failed to get hostname: %w", err)) - } - - addrs, err := net.InterfaceAddrs() - if err != nil { - return fail(fmt.Errorf("failed to get host IP addresses: %w", err)) - } - - var alternateIPs []net.IP - var alternateDNS []string - for _, addr := range addrs { - var ip net.IP - - switch v := addr.(type) { - case *net.IPNet: - ip = v.IP - case *net.IPAddr: - ip = v.IP - default: - continue - } - - alternateIPs = append(alternateIPs, ip) - alternateDNS = append(alternateDNS, ip.String()) - } - - // Generate a self signed certificate key (CA is self) - certPem, keyPem, err := k8scert.GenerateSelfSignedCertKey(hostName, alternateIPs, alternateDNS) - if err != nil { - return fail(fmt.Errorf("certificate key could not be created: %w", err)) - } - - // Load the tls certificate - tlsCert, err := tls.X509KeyPair(certPem, keyPem) - if err != nil { - return fail(fmt.Errorf("certificate could not be loaded: %w", err)) - } - - return tlsCert, nil -} diff --git a/pkg/cri/sbserver/streaming_test.go b/pkg/cri/sbserver/streaming_test.go deleted file mode 100644 index 9796b3876..000000000 --- a/pkg/cri/sbserver/streaming_test.go +++ /dev/null @@ -1,163 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "testing" - - "github.com/containerd/containerd/pkg/cri/config" - "github.com/stretchr/testify/assert" -) - -func TestValidateStreamServer(t *testing.T) { - for _, test := range []struct { - desc string - *criService - tlsMode streamListenerMode - expectErr bool - }{ - { - desc: "should pass with default withoutTLS", - criService: &criService{ - config: config.Config{ - PluginConfig: config.DefaultConfig(), - }, - }, - tlsMode: withoutTLS, - expectErr: false, - }, - { - desc: "should pass with x509KeyPairTLS", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: true, - X509KeyPairStreaming: config.X509KeyPairStreaming{ - TLSKeyFile: "non-empty", - TLSCertFile: "non-empty", - }, - }, - }, - }, - tlsMode: x509KeyPairTLS, - expectErr: false, - }, - { - desc: "should pass with selfSign", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: true, - }, - }, - }, - tlsMode: selfSignTLS, - expectErr: false, - }, - { - desc: "should return error with X509 keypair but not EnableTLSStreaming", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: false, - X509KeyPairStreaming: config.X509KeyPairStreaming{ - TLSKeyFile: "non-empty", - TLSCertFile: "non-empty", - }, - }, - }, - }, - tlsMode: -1, - expectErr: true, - }, - { - desc: "should return error with X509 TLSCertFile empty", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: true, - X509KeyPairStreaming: config.X509KeyPairStreaming{ - TLSKeyFile: "non-empty", - TLSCertFile: "", - }, - }, - }, - }, - tlsMode: -1, - expectErr: true, - }, - { - desc: "should return error with X509 TLSKeyFile empty", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: true, - X509KeyPairStreaming: config.X509KeyPairStreaming{ - TLSKeyFile: "", - TLSCertFile: "non-empty", - }, - }, - }, - }, - tlsMode: -1, - expectErr: true, - }, - { - desc: "should return error without EnableTLSStreaming and only TLSCertFile set", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: false, - X509KeyPairStreaming: config.X509KeyPairStreaming{ - TLSKeyFile: "", - TLSCertFile: "non-empty", - }, - }, - }, - }, - tlsMode: -1, - expectErr: true, - }, - { - desc: "should return error without EnableTLSStreaming and only TLSKeyFile set", - criService: &criService{ - config: config.Config{ - PluginConfig: config.PluginConfig{ - EnableTLSStreaming: false, - X509KeyPairStreaming: config.X509KeyPairStreaming{ - TLSKeyFile: "non-empty", - TLSCertFile: "", - }, - }, - }, - }, - tlsMode: -1, - expectErr: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - tlsMode, err := getStreamListenerMode(test.criService) - if test.expectErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - assert.Equal(t, test.tlsMode, tlsMode) - }) - } -} diff --git a/pkg/cri/sbserver/test_config.go b/pkg/cri/sbserver/test_config.go deleted file mode 100644 index 44908435b..000000000 --- a/pkg/cri/sbserver/test_config.go +++ /dev/null @@ -1,37 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import criconfig "github.com/containerd/containerd/pkg/cri/config" - -const ( - testRootDir = "/test/root" - testStateDir = "/test/state" - // Use an image id as test sandbox image to avoid image name resolve. - // TODO(random-liu): Change this to image name after we have complete image - // management unit test framework. - testSandboxImage = "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113798" // #nosec G101 -) - -var testConfig = criconfig.Config{ - RootDir: testRootDir, - StateDir: testStateDir, - PluginConfig: criconfig.PluginConfig{ - SandboxImage: testSandboxImage, - TolerateMissingHugetlbController: true, - }, -} diff --git a/pkg/cri/sbserver/update_runtime_config.go b/pkg/cri/sbserver/update_runtime_config.go deleted file mode 100644 index b609ec493..000000000 --- a/pkg/cri/sbserver/update_runtime_config.go +++ /dev/null @@ -1,148 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "fmt" - "net" - "os" - "path/filepath" - "strings" - "text/template" - "time" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/atomicfile" - "github.com/containerd/log" -) - -// cniConfigTemplate contains the values containerd will overwrite -// in the cni config template. -type cniConfigTemplate struct { - // PodCIDR is the cidr for pods on the node. - PodCIDR string - // PodCIDRRanges is the cidr ranges for pods on the node. - PodCIDRRanges []string - // Routes is a list of routes configured. - Routes []string -} - -const ( - // cniConfigFileName is the name of cni config file generated by containerd. - cniConfigFileName = "10-containerd-net.conflist" - // zeroCIDRv6 is the null route for IPv6. - zeroCIDRv6 = "::/0" - // zeroCIDRv4 is the null route for IPv4. - zeroCIDRv4 = "0.0.0.0/0" -) - -// UpdateRuntimeConfig updates the runtime config. Currently only handles podCIDR updates. -func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (*runtime.UpdateRuntimeConfigResponse, error) { - podCIDRs := r.GetRuntimeConfig().GetNetworkConfig().GetPodCidr() - if podCIDRs == "" { - return &runtime.UpdateRuntimeConfigResponse{}, nil - } - cidrs := strings.Split(podCIDRs, ",") - for i := range cidrs { - cidrs[i] = strings.TrimSpace(cidrs[i]) - } - routes, err := getRoutes(cidrs) - if err != nil { - return nil, fmt.Errorf("get routes: %w", err) - } - - confTemplate := c.config.NetworkPluginConfTemplate - if confTemplate == "" { - log.G(ctx).Info("No cni config template is specified, wait for other system components to drop the config.") - return &runtime.UpdateRuntimeConfigResponse{}, nil - } - netPlugin := c.netPlugin[defaultNetworkPlugin] - if netPlugin == nil { - log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) - return &runtime.UpdateRuntimeConfigResponse{}, nil - } - netStart := time.Now() - err = netPlugin.Status() - networkPluginOperations.WithValues(networkStatusOp).Inc() - networkPluginOperationsLatency.WithValues(networkStatusOp).UpdateSince(netStart) - if err == nil { - log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) - return &runtime.UpdateRuntimeConfigResponse{}, nil - } - networkPluginOperationsErrors.WithValues(networkStatusOp).Inc() - if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { - log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) - return &runtime.UpdateRuntimeConfigResponse{}, nil - } - if err := writeCNIConfigFile(ctx, c.config.NetworkPluginConfDir, confTemplate, cidrs[0], cidrs, routes); err != nil { - return nil, err - } - return &runtime.UpdateRuntimeConfigResponse{}, nil -} - -// getRoutes generates required routes for the passed in cidrs. -func getRoutes(cidrs []string) ([]string, error) { - var ( - routes []string - hasV4, hasV6 bool - ) - for _, c := range cidrs { - _, cidr, err := net.ParseCIDR(c) - if err != nil { - return nil, err - } - if cidr.IP.To4() != nil { - hasV4 = true - } else { - hasV6 = true - } - } - if hasV4 { - routes = append(routes, zeroCIDRv4) - } - if hasV6 { - routes = append(routes, zeroCIDRv6) - } - return routes, nil -} - -func writeCNIConfigFile(ctx context.Context, confDir string, confTemplate string, podCIDR string, podCIDRRanges []string, routes []string) error { - log.G(ctx).Infof("Generating cni config from template %q", confTemplate) - // generate cni config file from the template with updated pod cidr. - t, err := template.ParseFiles(confTemplate) - if err != nil { - return fmt.Errorf("failed to parse cni config template %q: %w", confTemplate, err) - } - if err := os.MkdirAll(confDir, 0755); err != nil { - return fmt.Errorf("failed to create cni config directory: %q: %w", confDir, err) - } - confFile := filepath.Join(confDir, cniConfigFileName) - f, err := atomicfile.New(confFile, 0o644) - defer func() { - err = f.Close() - }() - if err := t.Execute(f, cniConfigTemplate{ - PodCIDR: podCIDR, - PodCIDRRanges: podCIDRRanges, - Routes: routes, - }); err != nil { - return fmt.Errorf("failed to generate cni config file %q: %w", confFile, err) - } - return err -} diff --git a/pkg/cri/sbserver/update_runtime_config_test.go b/pkg/cri/sbserver/update_runtime_config_test.go deleted file mode 100644 index e416a7112..000000000 --- a/pkg/cri/sbserver/update_runtime_config_test.go +++ /dev/null @@ -1,143 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - "errors" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - criconfig "github.com/containerd/containerd/pkg/cri/config" - servertesting "github.com/containerd/containerd/pkg/cri/server/testing" -) - -func TestUpdateRuntimeConfig(t *testing.T) { - const ( - testTemplate = ` -{ - "name": "test-pod-network", - "cniVersion": "1.0.0", - "plugins": [ - { - "type": "ptp", - "mtu": 1460, - "ipam": { - "type": "host-local", - "subnet": "{{.PodCIDR}}", - "ranges": [{{range $i, $range := .PodCIDRRanges}}{{if $i}}, {{end}}[{"subnet": "{{$range}}"}]{{end}}], - "routes": [{{range $i, $route := .Routes}}{{if $i}}, {{end}}{"dst": "{{$route}}"}{{end}}] - } - }, - ] -}` - testCIDR = "10.0.0.0/24, 2001:4860:4860::/64" - expected = ` -{ - "name": "test-pod-network", - "cniVersion": "1.0.0", - "plugins": [ - { - "type": "ptp", - "mtu": 1460, - "ipam": { - "type": "host-local", - "subnet": "10.0.0.0/24", - "ranges": [[{"subnet": "10.0.0.0/24"}], [{"subnet": "2001:4860:4860::/64"}]], - "routes": [{"dst": "0.0.0.0/0"}, {"dst": "::/0"}] - } - }, - ] -}` - ) - - for _, test := range []struct { - name string - noTemplate bool - emptyCIDR bool - networkReady bool - expectCNIConfig bool - }{ - { - name: "should not generate cni config if cidr is empty", - emptyCIDR: true, - expectCNIConfig: false, - }, - { - name: "should not generate cni config if template file is not specified", - noTemplate: true, - expectCNIConfig: false, - }, - { - name: "should not generate cni config if network is ready", - networkReady: true, - expectCNIConfig: false, - }, - { - name: "should generate cni config if template is specified and cidr is provided", - expectCNIConfig: true, - }, - } { - test := test - t.Run(test.name, func(t *testing.T) { - testDir := t.TempDir() - templateName := filepath.Join(testDir, "template") - err := os.WriteFile(templateName, []byte(testTemplate), 0666) - require.NoError(t, err) - confDir := filepath.Join(testDir, "net.d") - confName := filepath.Join(confDir, cniConfigFileName) - - c := newTestCRIService() - c.config.CniConfig = criconfig.CniConfig{ - NetworkPluginConfDir: confDir, - NetworkPluginConfTemplate: templateName, - } - req := &runtime.UpdateRuntimeConfigRequest{ - RuntimeConfig: &runtime.RuntimeConfig{ - NetworkConfig: &runtime.NetworkConfig{ - PodCidr: testCIDR, - }, - }, - } - if test.noTemplate { - c.config.CniConfig.NetworkPluginConfTemplate = "" - } - if test.emptyCIDR { - req.RuntimeConfig.NetworkConfig.PodCidr = "" - } - if !test.networkReady { - c.netPlugin[defaultNetworkPlugin].(*servertesting.FakeCNIPlugin).StatusErr = errors.New("random error") - c.netPlugin[defaultNetworkPlugin].(*servertesting.FakeCNIPlugin).LoadErr = errors.New("random error") - } - _, err = c.UpdateRuntimeConfig(context.Background(), req) - assert.NoError(t, err) - if !test.expectCNIConfig { - _, err := os.Stat(confName) - assert.Error(t, err) - } else { - got, err := os.ReadFile(confName) - assert.NoError(t, err) - assert.Equal(t, expected, string(got)) - } - }) - } -} diff --git a/pkg/cri/sbserver/version.go b/pkg/cri/sbserver/version.go deleted file mode 100644 index 7ea9778f3..000000000 --- a/pkg/cri/sbserver/version.go +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package sbserver - -import ( - "context" - - "github.com/containerd/containerd/version" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/constants" -) - -const ( - containerName = "containerd" - // kubeAPIVersion is the api version of kubernetes. - // TODO(random-liu): Change this to actual CRI version. - kubeAPIVersion = "0.1.0" -) - -// Version returns the runtime name, runtime version and runtime API version. -func (c *criService) Version(ctx context.Context, r *runtime.VersionRequest) (*runtime.VersionResponse, error) { - return &runtime.VersionResponse{ - Version: kubeAPIVersion, - RuntimeName: containerName, - RuntimeVersion: version.Version, - RuntimeApiVersion: constants.CRIVersion, - }, nil -} diff --git a/pkg/cri/server/blockio_stub_linux.go b/pkg/cri/server/blockio_stub.go similarity index 100% rename from pkg/cri/server/blockio_stub_linux.go rename to pkg/cri/server/blockio_stub.go diff --git a/pkg/cri/server/cni_conf_syncer.go b/pkg/cri/server/cni_conf_syncer.go index 1efb8e2ea..f56e22203 100644 --- a/pkg/cri/server/cni_conf_syncer.go +++ b/pkg/cri/server/cni_conf_syncer.go @@ -22,7 +22,7 @@ import ( "path/filepath" "sync" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" "github.com/containerd/log" "github.com/fsnotify/fsnotify" ) diff --git a/pkg/cri/server/container_create.go b/pkg/cri/server/container_create.go index c54d79f22..b1bf08960 100644 --- a/pkg/cri/server/container_create.go +++ b/pkg/cri/server/container_create.go @@ -21,24 +21,29 @@ import ( "errors" "fmt" "path/filepath" - goruntime "runtime" + "strconv" + "strings" "time" "github.com/containerd/typeurl/v2" "github.com/davecgh/go-spew/spew" imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" - selinux "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux/label" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" "github.com/containerd/containerd" "github.com/containerd/containerd/containers" "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/pkg/blockio" + "github.com/containerd/containerd/pkg/cri/annotations" criconfig "github.com/containerd/containerd/pkg/cri/config" cio "github.com/containerd/containerd/pkg/cri/io" customopts "github.com/containerd/containerd/pkg/cri/opts" containerstore "github.com/containerd/containerd/pkg/cri/store/container" "github.com/containerd/containerd/pkg/cri/util" + "github.com/containerd/containerd/platforms" "github.com/containerd/log" ) @@ -56,12 +61,21 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta if err != nil { return nil, fmt.Errorf("failed to find sandbox id %q: %w", r.GetPodSandboxId(), err) } - sandboxID := sandbox.ID - s, err := sandbox.Container.Task(ctx, nil) + + controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) if err != nil { - return nil, fmt.Errorf("failed to get sandbox container task: %w", err) + return nil, fmt.Errorf("failed to get sandbox controller: %w", err) } - sandboxPid := s.Pid() + + cstatus, err := controller.Status(ctx, sandbox.ID, false) + if err != nil { + return nil, fmt.Errorf("failed to get controller status: %w", err) + } + + var ( + sandboxID = cstatus.SandboxID + sandboxPid = cstatus.Pid + ) // Generate unique id and name for the container and reserve the name. // Reserve the container name to avoid concurrent `CreateContainer` request creating @@ -94,7 +108,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta // Prepare container image snapshot. For container, the image should have // been pulled before creating the container, so do not ensure the image. - image, err := c.localResolve(config.GetImage().GetImage()) + image, err := c.LocalResolve(config.GetImage().GetImage()) if err != nil { return nil, fmt.Errorf("failed to resolve image %q: %w", config.GetImage().GetImage(), err) } @@ -104,11 +118,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } start := time.Now() - // Run container using the same runtime with sandbox. - sandboxInfo, err := sandbox.Container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox %q info: %w", sandboxID, err) - } // Create container root directory. containerRootDir := c.getContainerRootDir(id) @@ -140,25 +149,39 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } }() + platform, err := controller.Platform(ctx, sandboxID) + if err != nil { + return nil, fmt.Errorf("failed to query sandbox platform: %w", err) + } + var volumeMounts []*runtime.Mount if !c.config.IgnoreImageDefinedVolumes { // Create container image volumes mounts. - volumeMounts = c.volumeMounts(containerRootDir, config, &image.ImageSpec.Config) + volumeMounts = c.volumeMounts(platform, containerRootDir, config, &image.ImageSpec.Config) } else if len(image.ImageSpec.Config.Volumes) != 0 { log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID) } - // Generate container mounts. - mounts := c.containerMounts(sandboxID, config) - ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler) if err != nil { return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) } log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id) - spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, containerName, containerdImage.Name(), config, sandboxConfig, - &image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime) + spec, err := c.buildContainerSpec( + platform, + id, + sandboxID, + sandboxPid, + sandbox.NetNSPath, + containerName, + containerdImage.Name(), + config, + sandboxConfig, + &image.ImageSpec.Config, + volumeMounts, + ociRuntime, + ) if err != nil { return nil, fmt.Errorf("failed to generate container %q spec: %w", id, err) } @@ -191,7 +214,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta // Set snapshotter before any other options. opts := []containerd.NewContainerOpts{ - containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)), + containerd.WithSnapshotter(c.RuntimeSnapshotter(ctx, ociRuntime)), // Prepare container rootfs. This is always writeable even if // the container wants a readonly rootfs since we want to give // the runtime (runc) a chance to modify (e.g. to create mount @@ -204,13 +227,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta for _, v := range volumeMounts { mountMap[filepath.Clean(v.HostPath)] = v.ContainerPath } - platform := imagespec.Platform{ - OS: image.ImageSpec.OS, - Architecture: image.ImageSpec.Architecture, - OSVersion: image.ImageSpec.OSVersion, - OSFeatures: image.ImageSpec.OSFeatures, - Variant: image.ImageSpec.Variant, - } opts = append(opts, customopts.WithVolumes(mountMap, platform)) } meta.ImageRef = image.ID @@ -239,23 +255,29 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } }() - specOpts, err := c.containerSpecOpts(config, &image.ImageSpec.Config) + specOpts, err := c.platformSpecOpts(platform, config, &image.ImageSpec.Config) if err != nil { return nil, fmt.Errorf("failed to get container spec opts: %w", err) } containerLabels := buildLabels(config.Labels, image.ImageSpec.Config.Labels, containerKindContainer) - runtimeOptions, err := getRuntimeOptions(sandboxInfo) + sandboxInfo, err := c.client.SandboxStore().Get(ctx, sandboxID) if err != nil { - return nil, fmt.Errorf("failed to get runtime options: %w", err) + return nil, fmt.Errorf("unable to get sandbox %q metdata: %w", sandboxID, err) } opts = append(opts, containerd.WithSpec(spec, specOpts...), - containerd.WithRuntime(sandboxInfo.Runtime.Name, runtimeOptions), + containerd.WithRuntime(sandboxInfo.Runtime.Name, sandboxInfo.Runtime.Options), containerd.WithContainerLabels(containerLabels), - containerd.WithContainerExtension(containerMetadataExtension, &meta)) + containerd.WithContainerExtension(containerMetadataExtension, &meta), + ) + + // When using sandboxed shims, containerd's runtime needs to know which sandbox shim instance to use. + if ociRuntime.SandboxMode == string(criconfig.ModeShim) { + opts = append(opts, containerd.WithSandbox(sandboxID)) + } opts = append(opts, c.nri.WithContainerAdjustment()) defer func() { @@ -303,6 +325,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta if err := c.containerStore.Add(container); err != nil { return nil, fmt.Errorf("failed to add container %q into store: %w", id, err) } + c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) err = c.nri.PostCreateContainer(ctx, &sandbox, &container) @@ -318,12 +341,9 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta // volumeMounts sets up image volumes for container. Rely on the removal of container // root directory to do cleanup. Note that image volume will be skipped, if there is criMounts // specified with the same destination. -func (c *criService) volumeMounts(containerRootDir string, containerConfig *runtime.ContainerConfig, config *imagespec.ImageConfig) []*runtime.Mount { - if len(config.Volumes) == 0 { - return nil - } +func (c *criService) volumeMounts(platform platforms.Platform, containerRootDir string, containerConfig *runtime.ContainerConfig, config *imagespec.ImageConfig) []*runtime.Mount { var uidMappings, gidMappings []*runtime.IDMapping - if goruntime.GOOS != "windows" { + if platform.OS == "linux" { if usernsOpts := containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { uidMappings = usernsOpts.GetUids() gidMappings = usernsOpts.GetGids() @@ -331,6 +351,10 @@ func (c *criService) volumeMounts(containerRootDir string, containerConfig *runt } criMounts := containerConfig.GetMounts() + + if len(config.Volumes) == 0 { + return nil + } var mounts []*runtime.Mount for dst := range config.Volumes { if isInCRIMounts(dst, criMounts) { @@ -342,9 +366,14 @@ func (c *criService) volumeMounts(containerRootDir string, containerConfig *runt } volumeID := util.GenerateID() src := filepath.Join(containerRootDir, "volumes", volumeID) - if !filepath.IsAbs(dst) && goruntime.GOOS != "windows" { + // When the platform OS is Linux, ensure dst is a _Linux_ abs path. + // We can't use filepath.IsAbs() because, when executing on Windows, it checks for + // Windows abs paths. + if platform.OS == "linux" && !strings.HasPrefix(dst, "/") { + // On Windows, ToSlash() is needed to ensure the path is a valid Linux path. + // On Linux, ToSlash() is a no-op. oldDst := dst - dst = filepath.Join("/", dst) + dst = filepath.ToSlash(filepath.Join("/", dst)) log.L.Debugf("Volume destination %q is not absolute, converted to %q", oldDst, dst) } // addOCIBindMounts will create these volumes. @@ -360,7 +389,7 @@ func (c *criService) volumeMounts(containerRootDir string, containerConfig *runt } // runtimeSpec returns a default runtime spec used in cri-containerd. -func (c *criService) runtimeSpec(id string, baseSpecFile string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { +func (c *criService) runtimeSpec(id string, platform platforms.Platform, baseSpecFile string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { // GenerateSpec needs namespace. ctx := util.NamespacedContext() container := &containers.Container{ID: id} @@ -386,7 +415,7 @@ func (c *criService) runtimeSpec(id string, baseSpecFile string, opts ...oci.Spe return &spec, nil } - spec, err := oci.GenerateSpec(ctx, nil, container, opts...) + spec, err := oci.GenerateSpecWithPlatform(ctx, nil, platforms.Format(platform), container, opts...) if err != nil { return nil, fmt.Errorf("failed to generate spec: %w", err) } @@ -394,13 +423,637 @@ func (c *criService) runtimeSpec(id string, baseSpecFile string, opts ...oci.Spe return spec, nil } -// Overrides the default snapshotter if Snapshotter is set for this runtime. -// See https://github.com/containerd/containerd/issues/6657 -func (c *criService) runtimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string { - if ociRuntime.Snapshotter == "" { - return c.config.ContainerdConfig.Snapshotter +const ( + // relativeRootfsPath is the rootfs path relative to bundle path. + relativeRootfsPath = "rootfs" + // hostnameEnv is the key for HOSTNAME env. + hostnameEnv = "HOSTNAME" +) + +// generateUserString generates valid user string based on OCI Image Spec +// v1.0.0. +// +// CRI defines that the following combinations are valid: +// +// (none) -> "" +// username -> username +// username, uid -> username +// username, uid, gid -> username:gid +// username, gid -> username:gid +// uid -> uid +// uid, gid -> uid:gid +// gid -> error +// +// TODO(random-liu): Add group name support in CRI. +func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) { + var userstr, groupstr string + if uid != nil { + userstr = strconv.FormatInt(uid.GetValue(), 10) + } + if username != "" { + userstr = username + } + if gid != nil { + groupstr = strconv.FormatInt(gid.GetValue(), 10) + } + if userstr == "" { + if groupstr != "" { + return "", fmt.Errorf("user group %q is specified without user", groupstr) + } + return "", nil + } + if groupstr != "" { + userstr = userstr + ":" + groupstr + } + return userstr, nil +} + +// platformSpecOpts adds additional runtime spec options that may rely on +// runtime information (rootfs mounted), or platform specific checks with +// no defined workaround (yet) to specify for other platforms. +func (c *criService) platformSpecOpts( + platform platforms.Platform, + config *runtime.ContainerConfig, + imageConfig *imagespec.ImageConfig, +) ([]oci.SpecOpts, error) { + var specOpts []oci.SpecOpts + + // First deal with the set of options we can use across platforms currently. + // Linux user strings have workarounds on other platforms to avoid needing to + // mount the rootfs, but on Linux hosts it must be mounted + // + // TODO(dcantah): I think the seccomp package can be made to compile on + // !linux and used here as well. + if platform.OS == "linux" { + // Set container username. This could only be done by containerd, because it needs + // access to the container rootfs. Pass user name to containerd, and let it overwrite + // the spec for us. + securityContext := config.GetLinux().GetSecurityContext() + userstr, err := generateUserString( + securityContext.GetRunAsUsername(), + securityContext.GetRunAsUser(), + securityContext.GetRunAsGroup()) + if err != nil { + return nil, fmt.Errorf("failed to generate user string: %w", err) + } + if userstr == "" { + // Lastly, since no user override was passed via CRI try to set via OCI + // Image + userstr = imageConfig.User + } + if userstr != "" { + specOpts = append(specOpts, oci.WithUser(userstr)) + } } - log.G(ctx).Debugf("Set snapshotter for runtime %s to %s", ociRuntime.Type, ociRuntime.Snapshotter) - return ociRuntime.Snapshotter + // Now grab the truly platform specific options (seccomp, apparmor etc. for linux + // for example). + ctrSpecOpts, err := c.containerSpecOpts(config, imageConfig) + if err != nil { + return nil, err + } + specOpts = append(specOpts, ctrSpecOpts...) + + return specOpts, nil +} + +// buildContainerSpec build container's OCI spec depending on controller's target platform OS. +func (c *criService) buildContainerSpec( + platform platforms.Platform, + id string, + sandboxID string, + sandboxPid uint32, + netNSPath string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime criconfig.Runtime, +) (_ *runtimespec.Spec, retErr error) { + var ( + specOpts []oci.SpecOpts + err error + + // Platform helpers + isLinux = platform.OS == "linux" + isWindows = platform.OS == "windows" + isDarwin = platform.OS == "darwin" + ) + + switch { + case isLinux: + // Generate container mounts. + // No mounts are passed for other platforms. + linuxMounts := c.linuxContainerMounts(sandboxID, config) + + specOpts, err = c.buildLinuxSpec( + id, + sandboxID, + sandboxPid, + netNSPath, + containerName, + imageName, + config, + sandboxConfig, + imageConfig, + append(linuxMounts, extraMounts...), + ociRuntime, + ) + case isWindows: + specOpts, err = c.buildWindowsSpec( + id, + sandboxID, + sandboxPid, + netNSPath, + containerName, + imageName, + config, + sandboxConfig, + imageConfig, + extraMounts, + ociRuntime, + ) + case isDarwin: + specOpts, err = c.buildDarwinSpec( + id, + sandboxID, + containerName, + imageName, + config, + sandboxConfig, + imageConfig, + extraMounts, + ociRuntime, + ) + default: + return nil, fmt.Errorf("unsupported spec platform: %s", platform.OS) + } + + if err != nil { + return nil, fmt.Errorf("failed to generate spec opts: %w", err) + } + + return c.runtimeSpec(id, platform, ociRuntime.BaseRuntimeSpec, specOpts...) +} + +func (c *criService) buildLinuxSpec( + id string, + sandboxID string, + sandboxPid uint32, + netNSPath string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime criconfig.Runtime, +) (_ []oci.SpecOpts, retErr error) { + specOpts := []oci.SpecOpts{ + oci.WithoutRunMount, + } + // only clear the default security settings if the runtime does not have a custom + // base runtime spec spec. Admins can use this functionality to define + // default ulimits, seccomp, or other default settings. + if ociRuntime.BaseRuntimeSpec == "" { + specOpts = append(specOpts, customopts.WithoutDefaultSecuritySettings) + } + + specOpts = append(specOpts, + customopts.WithRelativeRoot(relativeRootfsPath), + customopts.WithProcessArgs(config, imageConfig), + oci.WithDefaultPathEnv, + // this will be set based on the security context below + oci.WithNewPrivileges, + ) + + if config.GetWorkingDir() != "" { + specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) + } else if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if config.GetTty() { + specOpts = append(specOpts, oci.WithTTY) + } + + // Add HOSTNAME env. + var ( + err error + hostname = sandboxConfig.GetHostname() + ) + if hostname == "" { + if hostname, err = c.os.Hostname(); err != nil { + return nil, err + } + } + specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname})) + + // Apply envs from image config first, so that envs from container config + // can override them. + env := append([]string{}, imageConfig.Env...) + for _, e := range config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + specOpts = append(specOpts, oci.WithEnv(env)) + + securityContext := config.GetLinux().GetSecurityContext() + labelOptions, err := toLabel(securityContext.GetSelinuxOptions()) + if err != nil { + return nil, err + } + if len(labelOptions) == 0 { + // Use pod level SELinux config + if sandbox, err := c.sandboxStore.Get(sandboxID); err == nil { + labelOptions, err = selinux.DupSecOpt(sandbox.ProcessLabel) + if err != nil { + return nil, err + } + } + } + + processLabel, mountLabel, err := label.InitLabels(labelOptions) + if err != nil { + return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err) + } + defer func() { + if retErr != nil { + selinux.ReleaseLabel(processLabel) + } + }() + + specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel)) + + if !c.config.DisableProcMount { + // Change the default masked/readonly paths to empty slices + // See https://github.com/containerd/containerd/issues/5029 + // TODO: Provide an option to set default paths to the ones in oci.populateDefaultUnixSpec() + specOpts = append(specOpts, oci.WithMaskedPaths([]string{}), oci.WithReadonlyPaths([]string{})) + + // Apply masked paths if specified. + // If the container is privileged, this will be cleared later on. + if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil { + specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths)) + } + + // Apply readonly paths if specified. + // If the container is privileged, this will be cleared later on. + if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil { + specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths)) + } + } + + specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext), + customopts.WithCapabilities(securityContext, c.allCaps)) + + if securityContext.GetPrivileged() { + if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() { + return nil, errors.New("no privileged container allowed in sandbox") + } + specOpts = append(specOpts, oci.WithPrivileged) + if !ociRuntime.PrivilegedWithoutHostDevices { + specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed) + } else if ociRuntime.PrivilegedWithoutHostDevicesAllDevicesAllowed { + // allow rwm on all devices for the container + specOpts = append(specOpts, oci.WithAllDevicesAllowed) + } + } + + // Clear all ambient capabilities. The implication of non-root + caps + // is not clearly defined in Kubernetes. + // See https://github.com/kubernetes/kubernetes/issues/56374 + // Keep docker's behavior for now. + specOpts = append(specOpts, + customopts.WithoutAmbientCaps, + customopts.WithSelinuxLabels(processLabel, mountLabel), + ) + + // TODO: Figure out whether we should set no new privilege for sandbox container by default + if securityContext.GetNoNewPrivs() { + specOpts = append(specOpts, oci.WithNoNewPrivileges) + } + // TODO(random-liu): [P1] Set selinux options (privileged or not). + if securityContext.GetReadonlyRootfs() { + specOpts = append(specOpts, oci.WithRootFSReadonly()) + } + + if c.config.DisableCgroup { + specOpts = append(specOpts, customopts.WithDisabledCgroups) + } else { + specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources(), c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController)) + if sandboxConfig.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id) + specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) + } + } + + supplementalGroups := securityContext.GetSupplementalGroups() + + // Get blockio class + blockIOClass, err := c.blockIOClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) + if err != nil { + return nil, fmt.Errorf("failed to set blockio class: %w", err) + } + if blockIOClass != "" { + if linuxBlockIO, err := blockio.ClassNameToLinuxOCI(blockIOClass); err == nil { + specOpts = append(specOpts, oci.WithBlockIO(linuxBlockIO)) + } else { + return nil, err + } + } + + // Get RDT class + rdtClass, err := c.rdtClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) + if err != nil { + return nil, fmt.Errorf("failed to set RDT class: %w", err) + } + if rdtClass != "" { + specOpts = append(specOpts, oci.WithRdt(rdtClass, "", "")) + } + + for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, + ociRuntime.PodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + ociRuntime.ContainerAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + // Default target PID namespace is the sandbox PID. + targetPid := sandboxPid + // If the container targets another container's PID namespace, + // set targetPid to the PID of that container. + nsOpts := securityContext.GetNamespaceOptions() + if nsOpts.GetPid() == runtime.NamespaceMode_TARGET { + targetContainer, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId) + if err != nil { + return nil, fmt.Errorf("invalid target container: %w", err) + } + + status := targetContainer.Status.Get() + targetPid = status.Pid + } + + uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions()) + if err != nil { + return nil, fmt.Errorf("user namespace configuration: %w", err) + } + + // Check sandbox userns config is consistent with container config. + sandboxUsernsOpts := sandboxConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() + if !sameUsernsConfig(sandboxUsernsOpts, nsOpts.GetUsernsOptions()) { + return nil, fmt.Errorf("user namespace config for sandbox is different from container. Sandbox userns config: %v - Container userns config: %v", sandboxUsernsOpts, nsOpts.GetUsernsOptions()) + } + + specOpts = append(specOpts, + customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), + customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids), + customopts.WithSupplementalGroups(supplementalGroups), + ) + specOpts = append( + specOpts, + annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., + ) + + // cgroupns is used for hiding /sys/fs/cgroup from containers. + // For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged. + // https://github.com/containers/libpod/issues/4363 + // https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace + if isUnifiedCgroupsMode() && !securityContext.GetPrivileged() { + specOpts = append(specOpts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.CgroupNamespace})) + } + + return specOpts, nil +} + +func (c *criService) buildWindowsSpec( + id string, + sandboxID string, + sandboxPid uint32, + netNSPath string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime criconfig.Runtime, +) (_ []oci.SpecOpts, retErr error) { + var specOpts []oci.SpecOpts + specOpts = append(specOpts, customopts.WithProcessCommandLineOrArgsForWindows(config, imageConfig)) + + // All containers in a pod need to have HostProcess set if it was set on the pod, + // and vice versa no containers in the pod can be HostProcess if the pods spec + // didn't have the field set. The only case that is valid is if these are the same value. + cntrHpc := config.GetWindows().GetSecurityContext().GetHostProcess() + sandboxHpc := sandboxConfig.GetWindows().GetSecurityContext().GetHostProcess() + if cntrHpc != sandboxHpc { + return nil, errors.New("pod spec and all containers inside must have the HostProcess field set to be valid") + } + + if config.GetWorkingDir() != "" { + specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) + } else if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if config.GetTty() { + specOpts = append(specOpts, oci.WithTTY) + } + + // Apply envs from image config first, so that envs from container config + // can override them. + env := append([]string{}, imageConfig.Env...) + for _, e := range config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + specOpts = append(specOpts, oci.WithEnv(env)) + + specOpts = append(specOpts, + // Clear the root location since hcsshim expects it. + // NOTE: readonly rootfs doesn't work on windows. + customopts.WithoutRoot, + oci.WithWindowsNetworkNamespace(netNSPath), + oci.WithHostname(sandboxConfig.GetHostname()), + ) + + specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts), customopts.WithWindowsDevices(config)) + + // Start with the image config user and override below if RunAsUsername is not "". + username := imageConfig.User + + windowsConfig := config.GetWindows() + if windowsConfig != nil { + specOpts = append(specOpts, customopts.WithWindowsResources(windowsConfig.GetResources())) + securityCtx := windowsConfig.GetSecurityContext() + if securityCtx != nil { + runAsUser := securityCtx.GetRunAsUsername() + if runAsUser != "" { + username = runAsUser + } + cs := securityCtx.GetCredentialSpec() + if cs != "" { + specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs)) + } + } + } + + // There really isn't a good Windows way to verify that the username is available in the + // image as early as here like there is for Linux. Later on in the stack hcsshim + // will handle the behavior of erroring out if the user isn't available in the image + // when trying to run the init process. + specOpts = append(specOpts, oci.WithUser(username)) + + for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, + ociRuntime.PodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + ociRuntime.ContainerAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(sandboxHpc))) + specOpts = append(specOpts, + annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., + ) + + return specOpts, nil +} + +func (c *criService) buildDarwinSpec( + id string, + sandboxID string, + containerName string, + imageName string, + config *runtime.ContainerConfig, + sandboxConfig *runtime.PodSandboxConfig, + imageConfig *imagespec.ImageConfig, + extraMounts []*runtime.Mount, + ociRuntime criconfig.Runtime, +) (_ []oci.SpecOpts, retErr error) { + specOpts := []oci.SpecOpts{ + customopts.WithProcessArgs(config, imageConfig), + } + + if config.GetWorkingDir() != "" { + specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) + } else if imageConfig.WorkingDir != "" { + specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) + } + + if config.GetTty() { + specOpts = append(specOpts, oci.WithTTY) + } + + // Apply envs from image config first, so that envs from container config + // can override them. + env := append([]string{}, imageConfig.Env...) + for _, e := range config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + specOpts = append(specOpts, oci.WithEnv(env)) + + specOpts = append(specOpts, customopts.WithDarwinMounts(c.os, config, extraMounts)) + + for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, + ociRuntime.PodAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + for pKey, pValue := range getPassthroughAnnotations(config.Annotations, + ociRuntime.ContainerAnnotations) { + specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) + } + + specOpts = append(specOpts, + annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., + ) + + return specOpts, nil +} + +// linuxContainerMounts sets up necessary container system file mounts +// including /dev/shm, /etc/hosts and /etc/resolv.conf. +func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { + var mounts []*runtime.Mount + securityContext := config.GetLinux().GetSecurityContext() + var uidMappings, gidMappings []*runtime.IDMapping + if usernsOpts := securityContext.GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { + uidMappings = usernsOpts.GetUids() + gidMappings = usernsOpts.GetGids() + } + + if !isInCRIMounts(etcHostname, config.GetMounts()) { + // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. + // For in-place upgrade, the old sandbox doesn't have the hostname file, + // do not mount this in that case. + // TODO(random-liu): Remove the check and always mount this when + // containerd 1.1 and 1.2 are deprecated. + hostpath := c.getSandboxHostname(sandboxID) + if _, err := c.os.Stat(hostpath); err == nil { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHostname, + HostPath: hostpath, + Readonly: securityContext.GetReadonlyRootfs(), + SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, + }) + } + } + + if !isInCRIMounts(etcHosts, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHosts, + HostPath: c.getSandboxHosts(sandboxID), + Readonly: securityContext.GetReadonlyRootfs(), + SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, + }) + } + + // Mount sandbox resolv.config. + // TODO: Need to figure out whether we should always mount it as read-only + if !isInCRIMounts(resolvConfPath, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: resolvConfPath, + HostPath: c.getResolvPath(sandboxID), + Readonly: securityContext.GetReadonlyRootfs(), + SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, + }) + } + + if !isInCRIMounts(devShm, config.GetMounts()) { + sandboxDevShm := c.getSandboxDevShm(sandboxID) + if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { + sandboxDevShm = devShm + } + mounts = append(mounts, &runtime.Mount{ + ContainerPath: devShm, + HostPath: sandboxDevShm, + Readonly: false, + SelinuxRelabel: sandboxDevShm != devShm, + // XXX: tmpfs support for idmap mounts got merged in + // Linux 6.3. + // Our Ubuntu 22.04 CI runs with 5.15 kernels, so + // disabling idmap mounts for this case makes the CI + // happy (the other fs used support idmap mounts in 5.15 + // kernels). + // We can enable this at a later stage, but as this + // tmpfs mount is exposed empty to the container (no + // prepopulated files) and using the hostIPC with userns + // is blocked by k8s, we can just avoid using the + // mappings and it should work fine. + }) + } + return mounts } diff --git a/pkg/cri/server/container_create_linux.go b/pkg/cri/server/container_create_linux.go index 1d7c66ccf..0c39eb08b 100644 --- a/pkg/cri/server/container_create_linux.go +++ b/pkg/cri/server/container_create_linux.go @@ -25,20 +25,14 @@ import ( "strconv" "strings" - "github.com/containerd/cgroups/v3" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + "github.com/containerd/containerd/contrib/apparmor" "github.com/containerd/containerd/contrib/seccomp" "github.com/containerd/containerd/oci" "github.com/containerd/containerd/snapshots" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - selinux "github.com/opencontainers/selinux/go-selinux" - "github.com/opencontainers/selinux/go-selinux/label" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/containerd/pkg/blockio" - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/config" customopts "github.com/containerd/containerd/pkg/cri/opts" ) @@ -57,338 +51,13 @@ const ( seccompDefaultProfile = dockerDefault ) -// containerMounts sets up necessary container system file mounts -// including /dev/shm, /etc/hosts and /etc/resolv.conf. -func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { - var mounts []*runtime.Mount - securityContext := config.GetLinux().GetSecurityContext() - var uidMappings, gidMappings []*runtime.IDMapping - if usernsOpts := securityContext.GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { - uidMappings = usernsOpts.GetUids() - gidMappings = usernsOpts.GetGids() - } - - if !isInCRIMounts(etcHostname, config.GetMounts()) { - // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. - // For in-place upgrade, the old sandbox doesn't have the hostname file, - // do not mount this in that case. - // TODO(random-liu): Remove the check and always mount this when - // containerd 1.1 and 1.2 are deprecated. - hostpath := c.getSandboxHostname(sandboxID) - if _, err := c.os.Stat(hostpath); err == nil { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHostname, - HostPath: hostpath, - Readonly: securityContext.GetReadonlyRootfs(), - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - } - - if !isInCRIMounts(etcHosts, config.GetMounts()) { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHosts, - HostPath: c.getSandboxHosts(sandboxID), - Readonly: securityContext.GetReadonlyRootfs(), - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - - // Mount sandbox resolv.config. - // TODO: Need to figure out whether we should always mount it as read-only - if !isInCRIMounts(resolvConfPath, config.GetMounts()) { - mounts = append(mounts, &runtime.Mount{ - ContainerPath: resolvConfPath, - HostPath: c.getResolvPath(sandboxID), - Readonly: securityContext.GetReadonlyRootfs(), - SelinuxRelabel: true, - UidMappings: uidMappings, - GidMappings: gidMappings, - }) - } - - if !isInCRIMounts(devShm, config.GetMounts()) { - sandboxDevShm := c.getSandboxDevShm(sandboxID) - if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { - sandboxDevShm = devShm - } - mounts = append(mounts, &runtime.Mount{ - ContainerPath: devShm, - HostPath: sandboxDevShm, - Readonly: false, - SelinuxRelabel: sandboxDevShm != devShm, - // XXX: tmpfs support for idmap mounts got merged in - // Linux 6.3. - // Our CI runs with 5.15 kernels, so disabling idmap - // mounts for this case makes the CI happy (the other fs - // used support idmap mounts in 5.15 kernels). - // We can enable this at a later stage, but as this - // tmpfs mount is exposed empty to the container (no - // prepopulated files) and using the hostIPC with userns - // is blocked by k8s, we can just avoid using the - // mappings and it should work fine. - }) - } - return mounts -} - -func (c *criService) containerSpec( - id string, - sandboxID string, - sandboxPid uint32, - netNSPath string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime config.Runtime, -) (_ *runtimespec.Spec, retErr error) { - specOpts := []oci.SpecOpts{ - oci.WithoutRunMount, - } - // only clear the default security settings if the runtime does not have a custom - // base runtime spec spec. Admins can use this functionality to define - // default ulimits, seccomp, or other default settings. - if ociRuntime.BaseRuntimeSpec == "" { - specOpts = append(specOpts, customopts.WithoutDefaultSecuritySettings) - } - specOpts = append(specOpts, - customopts.WithRelativeRoot(relativeRootfsPath), - customopts.WithProcessArgs(config, imageConfig), - oci.WithDefaultPathEnv, - // this will be set based on the security context below - oci.WithNewPrivileges, - ) - if config.GetWorkingDir() != "" { - specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) - } else if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if config.GetTty() { - specOpts = append(specOpts, oci.WithTTY) - } - - // Add HOSTNAME env. - var ( - err error - hostname = sandboxConfig.GetHostname() - ) - if hostname == "" { - if hostname, err = c.os.Hostname(); err != nil { - return nil, err - } - } - specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname})) - - // Apply envs from image config first, so that envs from container config - // can override them. - env := append([]string{}, imageConfig.Env...) - for _, e := range config.GetEnvs() { - env = append(env, e.GetKey()+"="+e.GetValue()) - } - specOpts = append(specOpts, oci.WithEnv(env)) - - securityContext := config.GetLinux().GetSecurityContext() - labelOptions, err := toLabel(securityContext.GetSelinuxOptions()) - if err != nil { - return nil, err - } - if len(labelOptions) == 0 { - // Use pod level SELinux config - if sandbox, err := c.sandboxStore.Get(sandboxID); err == nil { - labelOptions, err = selinux.DupSecOpt(sandbox.ProcessLabel) - if err != nil { - return nil, err - } - } - } - - processLabel, mountLabel, err := label.InitLabels(labelOptions) - if err != nil { - return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err) - } - defer func() { - if retErr != nil { - selinux.ReleaseLabel(processLabel) - } - }() - - specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel)) - - if !c.config.DisableProcMount { - // Change the default masked/readonly paths to empty slices - // See https://github.com/containerd/containerd/issues/5029 - // TODO: Provide an option to set default paths to the ones in oci.populateDefaultUnixSpec() - specOpts = append(specOpts, oci.WithMaskedPaths([]string{}), oci.WithReadonlyPaths([]string{})) - - // Apply masked paths if specified. - // If the container is privileged, this will be cleared later on. - if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil { - specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths)) - } - - // Apply readonly paths if specified. - // If the container is privileged, this will be cleared later on. - if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil { - specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths)) - } - } - - specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext), - customopts.WithCapabilities(securityContext, c.allCaps)) - - if securityContext.GetPrivileged() { - if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() { - return nil, errors.New("no privileged container allowed in sandbox") - } - specOpts = append(specOpts, oci.WithPrivileged) - if !ociRuntime.PrivilegedWithoutHostDevices { - specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed) - } else if ociRuntime.PrivilegedWithoutHostDevicesAllDevicesAllowed { - // allow rwm on all devices for the container - specOpts = append(specOpts, oci.WithAllDevicesAllowed) - } - } - - // Clear all ambient capabilities. The implication of non-root + caps - // is not clearly defined in Kubernetes. - // See https://github.com/kubernetes/kubernetes/issues/56374 - // Keep docker's behavior for now. - specOpts = append(specOpts, - customopts.WithoutAmbientCaps, - customopts.WithSelinuxLabels(processLabel, mountLabel), - ) - - // TODO: Figure out whether we should set no new privilege for sandbox container by default - if securityContext.GetNoNewPrivs() { - specOpts = append(specOpts, oci.WithNoNewPrivileges) - } - // TODO(random-liu): [P1] Set selinux options (privileged or not). - if securityContext.GetReadonlyRootfs() { - specOpts = append(specOpts, oci.WithRootFSReadonly()) - } - - if c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDisabledCgroups) - } else { - specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources(), c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController)) - if sandboxConfig.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id) - specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) - } - } - - supplementalGroups := securityContext.GetSupplementalGroups() - - // Get blockio class - blockIOClass, err := c.blockIOClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) - if err != nil { - return nil, fmt.Errorf("failed to set blockio class: %w", err) - } - if blockIOClass != "" { - if linuxBlockIO, err := blockio.ClassNameToLinuxOCI(blockIOClass); err == nil { - specOpts = append(specOpts, oci.WithBlockIO(linuxBlockIO)) - } else { - return nil, err - } - } - - // Get RDT class - rdtClass, err := c.rdtClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations) - if err != nil { - return nil, fmt.Errorf("failed to set RDT class: %w", err) - } - if rdtClass != "" { - specOpts = append(specOpts, oci.WithRdt(rdtClass, "", "")) - } - - for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, - ociRuntime.PodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - ociRuntime.ContainerAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - // Default target PID namespace is the sandbox PID. - targetPid := sandboxPid - // If the container targets another container's PID namespace, - // set targetPid to the PID of that container. - nsOpts := securityContext.GetNamespaceOptions() - if nsOpts.GetPid() == runtime.NamespaceMode_TARGET { - targetContainer, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId) - if err != nil { - return nil, fmt.Errorf("invalid target container: %w", err) - } - - status := targetContainer.Status.Get() - targetPid = status.Pid - } - - uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions()) - if err != nil { - return nil, fmt.Errorf("user namespace configuration: %w", err) - } - - // Check sandbox userns config is consistent with container config. - sandboxUsernsOpts := sandboxConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() - if !sameUsernsConfig(sandboxUsernsOpts, nsOpts.GetUsernsOptions()) { - return nil, fmt.Errorf("user namespace config for sandbox is different from container. Sandbox userns config: %v - Container userns config: %v", sandboxUsernsOpts, nsOpts.GetUsernsOptions()) - } - - specOpts = append(specOpts, - customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), - customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids), - customopts.WithSupplementalGroups(supplementalGroups), - ) - specOpts = append(specOpts, - annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., - ) - // cgroupns is used for hiding /sys/fs/cgroup from containers. - // For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged. - // https://github.com/containers/libpod/issues/4363 - // https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace - if cgroups.Mode() == cgroups.Unified && !securityContext.GetPrivileged() { - specOpts = append(specOpts, oci.WithLinuxNamespace( - runtimespec.LinuxNamespace{ - Type: runtimespec.CgroupNamespace, - })) - } - return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...) -} - func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - var specOpts []oci.SpecOpts + var ( + specOpts []oci.SpecOpts + err error + ) securityContext := config.GetLinux().GetSecurityContext() - // Set container username. This could only be done by containerd, because it needs - // access to the container rootfs. Pass user name to containerd, and let it overwrite - // the spec for us. - userstr, err := generateUserString( - securityContext.GetRunAsUsername(), - securityContext.GetRunAsUser(), - securityContext.GetRunAsGroup()) - if err != nil { - return nil, fmt.Errorf("failed to generate user string: %w", err) - } - if userstr == "" { - // Lastly, since no user override was passed via CRI try to set via OCI - // Image - userstr = imageConfig.User - } - if userstr != "" { - specOpts = append(specOpts, oci.WithUser(userstr)) - } - - userstr = "0" // runtime default + userstr := "0" // runtime default if securityContext.GetRunAsUsername() != "" { userstr = securityContext.GetRunAsUsername() } else if securityContext.GetRunAsUser() != nil { @@ -594,44 +263,6 @@ func appArmorProfileExists(profile string) (bool, error) { } } -// generateUserString generates valid user string based on OCI Image Spec -// v1.0.0. -// -// CRI defines that the following combinations are valid: -// -// (none) -> "" -// username -> username -// username, uid -> username -// username, uid, gid -> username:gid -// username, gid -> username:gid -// uid -> uid -// uid, gid -> uid:gid -// gid -> error -// -// TODO(random-liu): Add group name support in CRI. -func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) { - var userstr, groupstr string - if uid != nil { - userstr = strconv.FormatInt(uid.GetValue(), 10) - } - if username != "" { - userstr = username - } - if gid != nil { - groupstr = strconv.FormatInt(gid.GetValue(), 10) - } - if userstr == "" { - if groupstr != "" { - return "", fmt.Errorf("user group %q is specified without user", groupstr) - } - return "", nil - } - if groupstr != "" { - userstr = userstr + ":" + groupstr - } - return userstr, nil -} - // snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions() diff --git a/pkg/cri/server/container_create_linux_test.go b/pkg/cri/server/container_create_linux_test.go index cb07d8c60..35a37492d 100644 --- a/pkg/cri/server/container_create_linux_test.go +++ b/pkg/cri/server/container_create_linux_test.go @@ -18,7 +18,6 @@ package server import ( "context" - "errors" "fmt" "os" "path/filepath" @@ -32,6 +31,7 @@ import ( "github.com/containerd/containerd/contrib/seccomp" "github.com/containerd/containerd/mount" "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/platforms" imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux" @@ -255,7 +255,7 @@ func TestContainerCapabilities(t *testing.T) { c.allCaps = allCaps containerConfig.Linux.SecurityContext.Capabilities = test.capability - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) if selinux.GetEnabled() { @@ -290,7 +290,7 @@ func TestContainerSpecTty(t *testing.T) { c := newTestCRIService() for _, tty := range []bool{true, false} { containerConfig.Tty = tty - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) assert.Equal(t, tty, spec.Process.Terminal) @@ -317,7 +317,7 @@ func TestContainerSpecDefaultPath(t *testing.T) { imageConfig.Env = append(imageConfig.Env, pathenv) expected = pathenv } - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) assert.Contains(t, spec.Process.Env, expected) @@ -334,7 +334,7 @@ func TestContainerSpecReadonlyRootfs(t *testing.T) { c := newTestCRIService() for _, readonly := range []bool{true, false} { containerConfig.Linux.SecurityContext.ReadonlyRootfs = readonly - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) assert.Equal(t, readonly, spec.Root.Readonly) @@ -367,23 +367,16 @@ func TestContainerSpecWithExtraMounts(t *testing.T) { HostPath: "test-sys-extra", Readonly: false, }, - { - ContainerPath: "/dev", - HostPath: "test-dev-extra", - Readonly: false, - }, } - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) - var mounts, sysMounts, devMounts []runtimespec.Mount + var mounts, sysMounts []runtimespec.Mount for _, m := range spec.Mounts { if strings.HasPrefix(m.Destination, "test-container-path") { mounts = append(mounts, m) } else if m.Destination == "/sys" { sysMounts = append(sysMounts, m) - } else if strings.HasPrefix(m.Destination, "/dev") { - devMounts = append(devMounts, m) } } t.Logf("CRI mount should override extra mount") @@ -395,11 +388,6 @@ func TestContainerSpecWithExtraMounts(t *testing.T) { require.Len(t, sysMounts, 1) assert.Equal(t, "test-sys-extra", sysMounts[0].Source) assert.Contains(t, sysMounts[0].Options, "rw") - - t.Logf("Dev mount should override all default dev mounts") - require.Len(t, devMounts, 1) - assert.Equal(t, "test-dev-extra", devMounts[0].Source) - assert.Contains(t, devMounts[0].Options, "rw") } func TestContainerAndSandboxPrivileged(t *testing.T) { @@ -447,7 +435,7 @@ func TestContainerAndSandboxPrivileged(t *testing.T) { sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ Privileged: test.sandboxPrivileged, } - _, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + _, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) if test.expectError { assert.Error(t, err) } else { @@ -457,232 +445,6 @@ func TestContainerAndSandboxPrivileged(t *testing.T) { } } -func TestContainerMounts(t *testing.T) { - const testSandboxID = "test-id" - idmap := []*runtime.IDMapping{ - { - ContainerId: 0, - HostId: 100, - Length: 1, - }, - } - - for _, test := range []struct { - desc string - statFn func(string) (os.FileInfo, error) - criMounts []*runtime.Mount - securityContext *runtime.LinuxContainerSecurityContext - expectedMounts []*runtime.Mount - }{ - { - desc: "should setup ro mount when rootfs is read-only", - securityContext: &runtime.LinuxContainerSecurityContext{ - ReadonlyRootfs: true, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: true, - SelinuxRelabel: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: true, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: true, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - { - desc: "should setup rw mount when rootfs is read-write", - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - { - desc: "should setup uidMappings/gidMappings when userns is used", - securityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: idmap, - Gids: idmap, - }, - }, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - SelinuxRelabel: true, - UidMappings: idmap, - GidMappings: idmap, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - UidMappings: idmap, - GidMappings: idmap, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - UidMappings: idmap, - GidMappings: idmap, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - { - desc: "should use host /dev/shm when host ipc is set", - securityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE}, - }, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: "/dev/shm", - Readonly: false, - }, - }, - }, - { - desc: "should skip container mounts if already mounted by CRI", - criMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hostname", - HostPath: "/test-etc-hostname", - }, - { - ContainerPath: "/etc/hosts", - HostPath: "/test-etc-host", - }, - { - ContainerPath: resolvConfPath, - HostPath: "test-resolv-conf", - }, - { - ContainerPath: "/dev/shm", - HostPath: "test-dev-shm", - }, - }, - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: nil, - }, - { - desc: "should skip hostname mount if the old sandbox doesn't have hostname file", - statFn: func(path string) (os.FileInfo, error) { - assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path) - return nil, errors.New("random error") - }, - securityContext: &runtime.LinuxContainerSecurityContext{}, - expectedMounts: []*runtime.Mount{ - { - ContainerPath: "/etc/hosts", - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: resolvConfPath, - HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), - Readonly: false, - SelinuxRelabel: true, - }, - { - ContainerPath: "/dev/shm", - HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), - Readonly: false, - SelinuxRelabel: true, - }, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - config := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Mounts: test.criMounts, - Linux: &runtime.LinuxContainerConfig{ - SecurityContext: test.securityContext, - }, - } - c := newTestCRIService() - c.os.(*ostesting.FakeOS).StatFn = test.statFn - mounts := c.containerMounts(testSandboxID, config) - assert.Equal(t, test.expectedMounts, mounts, test.desc) - }) - } -} - func TestPrivilegedBindMount(t *testing.T) { testPid := uint32(1234) c := newTestCRIService() @@ -714,7 +476,7 @@ func TestPrivilegedBindMount(t *testing.T) { containerConfig.Linux.SecurityContext.Privileged = test.privileged sandboxConfig.Linux.SecurityContext.Privileged = test.privileged - spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) assert.NoError(t, err) if test.expectedSysFSRO { @@ -886,7 +648,7 @@ func TestPidNamespace(t *testing.T) { test := test t.Run(test.desc, func(t *testing.T) { containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{Pid: test.pidNS} - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) assert.Contains(t, spec.Linux.Namespaces, test.expected) }) @@ -916,6 +678,7 @@ func TestUserNamespace(t *testing.T) { containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() ociRuntime := config.Runtime{} c := newTestCRIService() + for _, test := range []struct { desc string userNS *runtime.UserNamespace @@ -1060,7 +823,7 @@ func TestUserNamespace(t *testing.T) { sandboxUserns = test.sandboxUserNS } sandboxConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: sandboxUserns} - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) if test.err { require.Error(t, err) @@ -1090,7 +853,7 @@ func TestNoDefaultRunMount(t *testing.T) { ociRuntime := config.Runtime{} c := newTestCRIService() - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) assert.NoError(t, err) for _, mount := range spec.Mounts { assert.NotEqual(t, "/run", mount.Destination) @@ -1194,7 +957,8 @@ func TestGenerateSeccompSecurityProfileSpecOpts(t *testing.T) { }, }, { - desc: "sp should not set seccomp when seccomp is not specified"}, + desc: "sp should not set seccomp when seccomp is not specified", + }, { desc: "sp should set default seccomp when seccomp is runtime/default", specOpts: seccomp.WithDefaultProfile(), @@ -1518,7 +1282,7 @@ func TestMaskedAndReadonlyPaths(t *testing.T) { sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ Privileged: test.privileged, } - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) if !test.privileged { // specCheck presumes an unprivileged container specCheck(t, testID, testSandboxID, testPid, spec) @@ -1571,7 +1335,7 @@ func TestHostname(t *testing.T) { sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ NamespaceOptions: &runtime.NamespaceOption{Network: test.networkNs}, } - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) assert.Contains(t, spec.Process.Env, test.expectedEnv) @@ -1584,7 +1348,7 @@ func TestDisableCgroup(t *testing.T) { ociRuntime := config.Runtime{} c := newTestCRIService() c.config.DisableCgroup = true - spec, err := c.containerSpec("test-id", "sandbox-id", 1234, "", "container-name", testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, "test-id", "sandbox-id", 1234, "", "container-name", testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) t.Log("resource limit should not be set") @@ -1654,7 +1418,6 @@ func TestGenerateUserString(t *testing.T) { }, } for _, tc := range testcases { - tc := tc t.Run(tc.name, func(t *testing.T) { r, err := generateUserString(tc.u, tc.uid, tc.gid) if tc.expectedError { @@ -1740,11 +1503,11 @@ additional-group-for-root:x:22222:root containerConfig.Linux.SecurityContext = test.securityContext imageConfig.User = test.imageConfigUser - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) spec.Root.Path = tempRootDir // simulating /etc/{passwd, group} - opts, err := c.containerSpecOpts(containerConfig, imageConfig) + opts, err := c.platformSpecOpts(platforms.DefaultSpec(), containerConfig, imageConfig) require.NoError(t, err) oci.ApplyOpts(ctx, nil, testContainer, spec, opts...) @@ -1816,7 +1579,7 @@ func TestNonRootUserAndDevices(t *testing.T) { }, } - spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) assert.NoError(t, err) assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID) @@ -1890,7 +1653,7 @@ func TestPrivilegedDevices(t *testing.T) { PrivilegedWithoutHostDevices: test.privilegedWithoutHostDevices, PrivilegedWithoutHostDevicesAllDevicesAllowed: test.privilegedWithoutHostDevicesAllDevicesAllowed, } - spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) assert.NoError(t, err) hostDevicesRaw, err := oci.HostDevices() @@ -1944,7 +1707,7 @@ func TestBaseOCISpec(t *testing.T) { testPid := uint32(1234) containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) assert.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) @@ -2275,9 +2038,8 @@ containerEdits: }, }, } { - test := test t.Run(test.description, func(t *testing.T) { - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) @@ -2311,141 +2073,3 @@ containerEdits: }) } } - -// TestLinuxVolumeMounts tests the linux-specific parts of VolumeMounts. -func TestLinuxVolumeMounts(t *testing.T) { - testContainerRootDir := "test-container-root" - idmap := []*runtime.IDMapping{ - { - ContainerId: 0, - HostId: 100, - Length: 1, - }, - } - - for _, test := range []struct { - desc string - criMounts []*runtime.Mount - imageVolumes map[string]struct{} - usernsEnabled bool - expectedMountDest []string - expectedMappings []*runtime.IDMapping - }{ - { - desc: "should skip image volumes if already mounted by CRI", - usernsEnabled: true, - criMounts: []*runtime.Mount{ - { - ContainerPath: "/test-volume-1", - HostPath: "/test-hostpath-1", - }, - }, - imageVolumes: map[string]struct{}{ - "/test-volume-1": {}, - "/test-volume-2": {}, - }, - expectedMountDest: []string{ - "/test-volume-2", - }, - expectedMappings: idmap, - }, - { - desc: "should include mappings for image volumes", - usernsEnabled: true, - imageVolumes: map[string]struct{}{ - "/test-volume-1/": {}, - "/test-volume-2/": {}, - }, - expectedMountDest: []string{ - "/test-volume-2/", - "/test-volume-2/", - }, - expectedMappings: idmap, - }, - { - desc: "should convert rel imageVolume paths to abs paths", - imageVolumes: map[string]struct{}{ - "test-volume-1/": {}, - "./test-volume-2/": {}, - "../../test-volume-3/": {}, - }, - expectedMountDest: []string{ - "/test-volume-1", - "/test-volume-2", - "/test-volume-3", - }, - }, - { - desc: "should convert rel imageVolume paths to abs paths and add userns mappings", - usernsEnabled: true, - imageVolumes: map[string]struct{}{ - "test-volume-1/": {}, - "./test-volume-2/": {}, - "../../test-volume-3/": {}, - }, - expectedMountDest: []string{ - "/test-volume-1", - "/test-volume-2", - "/test-volume-3", - }, - expectedMappings: idmap, - }, - { - desc: "doesn't include mappings for image volumes if userns is disabled", - imageVolumes: map[string]struct{}{ - "/test-volume-1/": {}, - "/test-volume-2/": {}, - }, - expectedMountDest: []string{ - "/test-volume-2/", - "/test-volume-2/", - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - config := &imagespec.ImageConfig{ - Volumes: test.imageVolumes, - } - containerConfig := &runtime.ContainerConfig{ - Mounts: test.criMounts, - } - - if test.usernsEnabled { - containerConfig.Linux = &runtime.LinuxContainerConfig{ - SecurityContext: &runtime.LinuxContainerSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: idmap, - Gids: idmap, - }, - }, - }, - } - } - - c := newTestCRIService() - got := c.volumeMounts(testContainerRootDir, containerConfig, config) - assert.Len(t, got, len(test.expectedMountDest)) - for _, dest := range test.expectedMountDest { - found := false - for _, m := range got { - if m.ContainerPath != dest { - continue - } - found = true - assert.Equal(t, - filepath.Dir(m.HostPath), - filepath.Join(testContainerRootDir, "volumes")) - - if test.expectedMappings != nil { - assert.Equal(t, test.expectedMappings, m.UidMappings) - assert.Equal(t, test.expectedMappings, m.GidMappings) - } - } - assert.True(t, found) - } - }) - } -} diff --git a/pkg/cri/server/container_create_other.go b/pkg/cri/server/container_create_other.go index 84b56e407..9f010e6f4 100644 --- a/pkg/cri/server/container_create_other.go +++ b/pkg/cri/server/container_create_other.go @@ -19,39 +19,13 @@ package server import ( - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/snapshots" imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/config" + "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/snapshots" ) -// containerMounts sets up necessary container system file mounts -// including /dev/shm, /etc/hosts and /etc/resolv.conf. -func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { - return []*runtime.Mount{} -} - -func (c *criService) containerSpec( - id string, - sandboxID string, - sandboxPid uint32, - netNSPath string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime config.Runtime, -) (_ *runtimespec.Spec, retErr error) { - specOpts := annotations.DefaultCRIAnnotations(id, containerName, imageName, sandboxConfig, false) - return c.runtimeSpec(sandboxID, ociRuntime.BaseRuntimeSpec, specOpts...) -} - func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { return []oci.SpecOpts{}, nil } diff --git a/pkg/cri/server/container_create_other_test.go b/pkg/cri/server/container_create_other_test.go index 5a539e457..604b289eb 100644 --- a/pkg/cri/server/container_create_other_test.go +++ b/pkg/cri/server/container_create_other_test.go @@ -23,7 +23,10 @@ import ( imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/containerd/containerd/pkg/cri/annotations" ) // checkMount is defined by all tests but not used here @@ -31,10 +34,82 @@ var _ = checkMount func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { - config := &runtime.ContainerConfig{} - sandboxConfig := &runtime.PodSandboxConfig{} - imageConfig := &imagespec.ImageConfig{} + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Image: &runtime.ImageSpec{ + Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", + }, + Command: []string{"test", "command"}, + Args: []string{"test", "args"}, + WorkingDir: "test-cwd", + Envs: []*runtime.KeyValue{ + {Key: "k1", Value: "v1"}, + {Key: "k2", Value: "v2"}, + {Key: "k3", Value: "v3=v3bis"}, + {Key: "k4", Value: "v4=v4bis=foop"}, + }, + Labels: map[string]string{"a": "b"}, + Annotations: map[string]string{"ca-c": "ca-d"}, + Mounts: []*runtime.Mount{ + // everything default + { + ContainerPath: "container-path-1", + HostPath: "host-path-1", + }, + // readOnly + { + ContainerPath: "container-path-2", + HostPath: "host-path-2", + Readonly: true, + }, + }, + } + sandboxConfig := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-sandbox-name", + Uid: "test-sandbox-uid", + Namespace: "test-sandbox-ns", + Attempt: 2, + }, + Annotations: map[string]string{"c": "d"}, + } + imageConfig := &imagespec.ImageConfig{ + Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, + Entrypoint: []string{"/entrypoint"}, + Cmd: []string{"cmd"}, + WorkingDir: "/workspace", + } specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) { + assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args) + assert.Equal(t, "test-cwd", spec.Process.Cwd) + assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop") + assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop") + + t.Logf("Check bind mount") + checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rw"}, nil) + checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"ro"}, nil) + + t.Logf("Check PodSandbox annotations") + assert.Contains(t, spec.Annotations, annotations.SandboxID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID) + + assert.Contains(t, spec.Annotations, annotations.ContainerType) + assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer) + + assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) + assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns") + + assert.Contains(t, spec.Annotations, annotations.SandboxUID) + assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid") + + assert.Contains(t, spec.Annotations, annotations.SandboxName) + assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name") + + assert.Contains(t, spec.Annotations, annotations.ImageName) + assert.EqualValues(t, spec.Annotations[annotations.ImageName], testImageName) } return config, sandboxConfig, imageConfig, specCheck } diff --git a/pkg/cri/server/container_create_test.go b/pkg/cri/server/container_create_test.go index 94bf0c4ae..6f3294f73 100644 --- a/pkg/cri/server/container_create_test.go +++ b/pkg/cri/server/container_create_test.go @@ -18,10 +18,15 @@ package server import ( "context" + "errors" + "os" "path/filepath" goruntime "runtime" "testing" + ostesting "github.com/containerd/containerd/pkg/os/testing" + "github.com/containerd/containerd/platforms" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" @@ -34,6 +39,8 @@ import ( "github.com/containerd/containerd/pkg/cri/opts" ) +var currentPlatform = platforms.DefaultSpec() + func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string, contains, notcontains []string) { found := false @@ -63,7 +70,7 @@ func TestGeneralContainerSpec(t *testing.T) { c := newTestCRIService() testSandboxID := "sandbox-id" testContainerName := "container-name" - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) require.NoError(t, err) specCheck(t, testID, testSandboxID, testPid, spec) } @@ -139,7 +146,7 @@ func TestPodAnnotationPassthroughContainerSpec(t *testing.T) { ociRuntime := config.Runtime{ PodAnnotations: test.podAnnotations, } - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) assert.NoError(t, err) assert.NotNil(t, spec) @@ -224,11 +231,22 @@ func TestContainerSpecCommand(t *testing.T) { func TestVolumeMounts(t *testing.T) { testContainerRootDir := "test-container-root" + idmap := []*runtime.IDMapping{ + { + ContainerId: 0, + HostId: 100, + Length: 1, + }, + } + for _, test := range []struct { desc string + platform platforms.Platform criMounts []*runtime.Mount + usernsEnabled bool imageVolumes map[string]struct{} expectedMountDest []string + expectedMappings []*runtime.IDMapping }{ { desc: "should setup rw mount for image volumes", @@ -273,6 +291,65 @@ func TestVolumeMounts(t *testing.T) { "/test-volume-2/", }, }, + { + desc: "should make relative paths absolute on Linux", + platform: platforms.Platform{OS: "linux"}, + imageVolumes: map[string]struct{}{ + "./test-volume-1": {}, + "C:/test-volume-2": {}, + "../../test-volume-3": {}, + "/abs/test-volume-4": {}, + }, + expectedMountDest: []string{ + "/test-volume-1", + "/C:/test-volume-2", + "/test-volume-3", + "/abs/test-volume-4", + }, + }, + { + desc: "should include mappings for image volumes on Linux", + platform: platforms.Platform{OS: "linux"}, + usernsEnabled: true, + imageVolumes: map[string]struct{}{ + "/test-volume-1/": {}, + "/test-volume-2/": {}, + }, + expectedMountDest: []string{ + "/test-volume-2/", + "/test-volume-2/", + }, + expectedMappings: idmap, + }, + { + desc: "should NOT include mappings for image volumes on Linux if !userns", + platform: platforms.Platform{OS: "linux"}, + usernsEnabled: false, + imageVolumes: map[string]struct{}{ + "/test-volume-1/": {}, + "/test-volume-2/": {}, + }, + expectedMountDest: []string{ + "/test-volume-2/", + "/test-volume-2/", + }, + }, + { + desc: "should convert rel imageVolume paths to abs paths and add userns mappings", + platform: platforms.Platform{OS: "linux"}, + usernsEnabled: true, + imageVolumes: map[string]struct{}{ + "test-volume-1/": {}, + "C:/test-volume-2/": {}, + "../../test-volume-3/": {}, + }, + expectedMountDest: []string{ + "/test-volume-1", + "/C:/test-volume-2", + "/test-volume-3", + }, + expectedMappings: idmap, + }, } { test := test t.Run(test.desc, func(t *testing.T) { @@ -280,19 +357,38 @@ func TestVolumeMounts(t *testing.T) { Volumes: test.imageVolumes, } containerConfig := &runtime.ContainerConfig{Mounts: test.criMounts} + if test.usernsEnabled { + containerConfig.Linux = &runtime.LinuxContainerConfig{ + SecurityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: idmap, + Gids: idmap, + }, + }, + }, + } + } + c := newTestCRIService() - got := c.volumeMounts(testContainerRootDir, containerConfig, config) + got := c.volumeMounts(test.platform, testContainerRootDir, containerConfig, config) assert.Len(t, got, len(test.expectedMountDest)) for _, dest := range test.expectedMountDest { found := false for _, m := range got { - if m.ContainerPath == dest { - found = true - assert.Equal(t, - filepath.Dir(m.HostPath), - filepath.Join(testContainerRootDir, "volumes")) - break + if m.ContainerPath != dest { + continue } + found = true + assert.Equal(t, + filepath.Dir(m.HostPath), + filepath.Join(testContainerRootDir, "volumes")) + if test.expectedMappings != nil { + assert.Equal(t, test.expectedMappings, m.UidMappings) + assert.Equal(t, test.expectedMappings, m.GidMappings) + } + break } assert.True(t, found) } @@ -415,7 +511,7 @@ func TestContainerAnnotationPassthroughContainerSpec(t *testing.T) { PodAnnotations: test.podAnnotations, ContainerAnnotations: test.containerAnnotations, } - spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) assert.NoError(t, err) assert.NotNil(t, spec) @@ -438,6 +534,7 @@ func TestBaseRuntimeSpec(t *testing.T) { out, err := c.runtimeSpec( "id1", + platforms.DefaultSpec(), "/etc/containerd/cri-base.json", oci.WithHostname("new-host"), oci.WithDomainname("new-domain"), @@ -455,38 +552,228 @@ func TestBaseRuntimeSpec(t *testing.T) { assert.Equal(t, filepath.Join("/", constants.K8sContainerdNamespace, "id1"), out.Linux.CgroupsPath) } -func TestRuntimeSnapshotter(t *testing.T) { - defaultRuntime := config.Runtime{ - Snapshotter: "", - } - - fooRuntime := config.Runtime{ - Snapshotter: "devmapper", +func TestLinuxContainerMounts(t *testing.T) { + const testSandboxID = "test-id" + idmap := []*runtime.IDMapping{ + { + ContainerId: 0, + HostId: 100, + Length: 1, + }, } for _, test := range []struct { - desc string - runtime config.Runtime - expectSnapshotter string + desc string + statFn func(string) (os.FileInfo, error) + criMounts []*runtime.Mount + securityContext *runtime.LinuxContainerSecurityContext + expectedMounts []*runtime.Mount }{ { - desc: "should return default snapshotter when runtime.Snapshotter is not set", - runtime: defaultRuntime, - expectSnapshotter: config.DefaultConfig().Snapshotter, + desc: "should setup ro mount when rootfs is read-only", + securityContext: &runtime.LinuxContainerSecurityContext{ + ReadonlyRootfs: true, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: true, + SelinuxRelabel: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: true, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: true, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, }, { - desc: "should return overridden snapshotter when runtime.Snapshotter is set", - runtime: fooRuntime, - expectSnapshotter: "devmapper", + desc: "should setup rw mount when rootfs is read-write", + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, + }, + { + desc: "should setup uidMappings/gidMappings when userns is used", + securityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: idmap, + Gids: idmap, + }, + }, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + SelinuxRelabel: true, + UidMappings: idmap, + GidMappings: idmap, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + UidMappings: idmap, + GidMappings: idmap, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + UidMappings: idmap, + GidMappings: idmap, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, + }, + { + desc: "should use host /dev/shm when host ipc is set", + securityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE}, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: "/dev/shm", + Readonly: false, + }, + }, + }, + { + desc: "should skip container mounts if already mounted by CRI", + criMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: "/test-etc-hostname", + }, + { + ContainerPath: "/etc/hosts", + HostPath: "/test-etc-host", + }, + { + ContainerPath: resolvConfPath, + HostPath: "test-resolv-conf", + }, + { + ContainerPath: "/dev/shm", + HostPath: "test-dev-shm", + }, + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: nil, + }, + { + desc: "should skip hostname mount if the old sandbox doesn't have hostname file", + statFn: func(path string) (os.FileInfo, error) { + assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path) + return nil, errors.New("random error") + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, }, } { test := test t.Run(test.desc, func(t *testing.T) { - cri := newTestCRIService() - cri.config = config.Config{ - PluginConfig: config.DefaultConfig(), + config := &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: "test-name", + Attempt: 1, + }, + Mounts: test.criMounts, + Linux: &runtime.LinuxContainerConfig{ + SecurityContext: test.securityContext, + }, } - assert.Equal(t, test.expectSnapshotter, cri.runtimeSnapshotter(context.Background(), test.runtime)) + c := newTestCRIService() + c.os.(*ostesting.FakeOS).StatFn = test.statFn + mounts := c.linuxContainerMounts(testSandboxID, config) + assert.Equal(t, test.expectedMounts, mounts, test.desc) }) } } diff --git a/pkg/cri/server/container_create_windows.go b/pkg/cri/server/container_create_windows.go index ec253bcb1..0b72173ce 100644 --- a/pkg/cri/server/container_create_windows.go +++ b/pkg/cri/server/container_create_windows.go @@ -17,122 +17,15 @@ package server import ( - "errors" "strconv" imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" "github.com/containerd/containerd/oci" "github.com/containerd/containerd/snapshots" - - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/config" - customopts "github.com/containerd/containerd/pkg/cri/opts" ) -// No container mounts for windows. -func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { - return nil -} - -func (c *criService) containerSpec( - id string, - sandboxID string, - sandboxPid uint32, - netNSPath string, - containerName string, - imageName string, - config *runtime.ContainerConfig, - sandboxConfig *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, - extraMounts []*runtime.Mount, - ociRuntime config.Runtime, -) (*runtimespec.Spec, error) { - var specOpts []oci.SpecOpts - specOpts = append(specOpts, customopts.WithProcessCommandLineOrArgsForWindows(config, imageConfig)) - - // All containers in a pod need to have HostProcess set if it was set on the pod, - // and vice versa no containers in the pod can be HostProcess if the pods spec - // didn't have the field set. The only case that is valid is if these are the same value. - cntrHpc := config.GetWindows().GetSecurityContext().GetHostProcess() - sandboxHpc := sandboxConfig.GetWindows().GetSecurityContext().GetHostProcess() - if cntrHpc != sandboxHpc { - return nil, errors.New("pod spec and all containers inside must have the HostProcess field set to be valid") - } - - if config.GetWorkingDir() != "" { - specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir())) - } else if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if config.GetTty() { - specOpts = append(specOpts, oci.WithTTY) - } - - // Apply envs from image config first, so that envs from container config - // can override them. - env := append([]string{}, imageConfig.Env...) - for _, e := range config.GetEnvs() { - env = append(env, e.GetKey()+"="+e.GetValue()) - } - specOpts = append(specOpts, oci.WithEnv(env)) - - specOpts = append(specOpts, - // Clear the root location since hcsshim expects it. - // NOTE: readonly rootfs doesn't work on windows. - customopts.WithoutRoot, - oci.WithWindowsNetworkNamespace(netNSPath), - oci.WithHostname(sandboxConfig.GetHostname()), - ) - - specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts), customopts.WithWindowsDevices(config)) - - // Start with the image config user and override below if RunAsUsername is not "". - username := imageConfig.User - - windowsConfig := config.GetWindows() - if windowsConfig != nil { - specOpts = append(specOpts, customopts.WithWindowsResources(windowsConfig.GetResources())) - securityCtx := windowsConfig.GetSecurityContext() - if securityCtx != nil { - runAsUser := securityCtx.GetRunAsUsername() - if runAsUser != "" { - username = runAsUser - } - cs := securityCtx.GetCredentialSpec() - if cs != "" { - specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs)) - } - } - } - - // There really isn't a good Windows way to verify that the username is available in the - // image as early as here like there is for Linux. Later on in the stack hcsshim - // will handle the behavior of erroring out if the user isn't available in the image - // when trying to run the init process. - specOpts = append(specOpts, oci.WithUser(username)) - - for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations, - ociRuntime.PodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - ociRuntime.ContainerAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(sandboxHpc))) - specOpts = append(specOpts, - annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)..., - ) - - return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...) -} - // No extra spec options needed for windows. func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { return nil, nil diff --git a/pkg/cri/server/container_create_windows_test.go b/pkg/cri/server/container_create_windows_test.go index 706c05253..70f9a2afd 100644 --- a/pkg/cri/server/container_create_windows_test.go +++ b/pkg/cri/server/container_create_windows_test.go @@ -22,27 +22,12 @@ import ( imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" "github.com/containerd/containerd/pkg/cri/annotations" "github.com/containerd/containerd/pkg/cri/config" ) -func getSandboxConfig() *runtime.PodSandboxConfig { - return &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-sandbox-name", - Uid: "test-sandbox-uid", - Namespace: "test-sandbox-ns", - Attempt: 2, - }, - Windows: &runtime.WindowsPodSandboxConfig{}, - Hostname: "test-hostname", - Annotations: map[string]string{"c": "d"}, - } -} - func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) { config := &runtime.ContainerConfig{ @@ -91,7 +76,17 @@ func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandbox }, }, } - sandboxConfig := getSandboxConfig() + sandboxConfig := &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-sandbox-name", + Uid: "test-sandbox-uid", + Namespace: "test-sandbox-ns", + Attempt: 2, + }, + Windows: &runtime.WindowsPodSandboxConfig{}, + Hostname: "test-hostname", + Annotations: map[string]string{"c": "d"}, + } imageConfig := &imagespec.ImageConfig{ Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"}, Entrypoint: []string{"/entrypoint"}, @@ -156,7 +151,7 @@ func TestContainerWindowsNetworkNamespace(t *testing.T) { c := newTestCRIService() containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() - spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) assert.NoError(t, err) assert.NotNil(t, spec) specCheck(t, testID, testSandboxID, testPid, spec) @@ -178,7 +173,7 @@ func TestMountCleanPath(t *testing.T) { ContainerPath: "c:/test/container-path", HostPath: "c:/test/host-path", }) - spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) assert.NoError(t, err) assert.NotNil(t, spec) specCheck(t, testID, testSandboxID, testPid, spec) @@ -198,7 +193,7 @@ func TestMountNamedPipe(t *testing.T) { ContainerPath: `\\.\pipe\foo`, HostPath: `\\.\pipe\foo`, }) - spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) + spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) assert.NoError(t, err) assert.NotNil(t, spec) specCheck(t, testID, testSandboxID, testPid, spec) @@ -250,7 +245,7 @@ func TestHostProcessRequirements(t *testing.T) { sandboxConfig.Windows.SecurityContext = &runtime.WindowsSandboxSecurityContext{ HostProcess: test.sandboxHostProcess, } - _, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + _, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) if test.expectError { assert.Error(t, err) } else { @@ -259,111 +254,3 @@ func TestHostProcessRequirements(t *testing.T) { }) } } - -func TestEntrypointAndCmdForArgsEscaped(t *testing.T) { - testID := "test-id" - testSandboxID := "sandbox-id" - testContainerName := "container-name" - testPid := uint32(1234) - nsPath := "test-ns" - c := newTestCRIService() - - for _, test := range []struct { - name string - imgEntrypoint []string - imgCmd []string - command []string - args []string - expectedArgs []string - expectedCommandLine string - ArgsEscaped bool - }{ - // override image entrypoint and cmd in shell form with container args and verify expected runtime spec - { - name: "TestShellFormImgEntrypointCmdWithCtrArgs", - imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`}, - imgCmd: []string{`cmd -args "hello world"`}, - command: nil, - args: []string{`cmd -args "additional args"`}, - expectedArgs: nil, - expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "test value" "cmd -args \"additional args\""`, - ArgsEscaped: true, - }, - // check image entrypoint and cmd in shell form without overriding with container command and args and verify expected runtime spec - { - name: "TestShellFormImgEntrypointCmdWithoutCtrArgs", - imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`}, - imgCmd: []string{`cmd -args "hello world"`}, - command: nil, - args: nil, - expectedArgs: nil, - expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "test value" "cmd -args \"hello world\""`, - ArgsEscaped: true, - }, - // override image entrypoint and cmd by container command and args in shell form and verify expected runtime spec - { - name: "TestShellFormImgEntrypointCmdWithCtrEntrypointAndArgs", - imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`}, - imgCmd: []string{`cmd -args "hello world"`}, - command: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "additional test value"}, - args: []string{"cmd", "-args", "additional args"}, - expectedArgs: nil, - expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "additional test value" cmd -args "additional args"`, - ArgsEscaped: true, - }, - // override image cmd by container args in exec form and verify expected runtime spec - { - name: "TestExecFormImgEntrypointCmdWithCtrArgs", - imgEntrypoint: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value"}, - imgCmd: []string{"cmd", "-args", "hello world"}, - command: nil, - args: []string{"additional", "args"}, - expectedArgs: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value", "additional", "args"}, - expectedCommandLine: "", - ArgsEscaped: false, - }, - // check image entrypoint and cmd in exec form without overriding with container command and args and verify expected runtime spec - { - name: "TestExecFormImgEntrypointCmdWithoutCtrArgs", - imgEntrypoint: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value"}, - imgCmd: []string{"cmd", "-args", "hello world"}, - command: nil, - args: nil, - expectedArgs: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value", "cmd", "-args", "hello world"}, - expectedCommandLine: "", - ArgsEscaped: false, - }, - } { - test := test - t.Run(test.name, func(t *testing.T) { - imageConfig := &imagespec.ImageConfig{ - Entrypoint: test.imgEntrypoint, - Cmd: test.imgCmd, - ArgsEscaped: test.ArgsEscaped, - } - sandboxConfig := getSandboxConfig() - containerConfig := &runtime.ContainerConfig{ - Metadata: &runtime.ContainerMetadata{ - Name: "test-name", - Attempt: 1, - }, - Image: &runtime.ImageSpec{ - Image: testImageName, - }, - Command: test.command, - Args: test.args, - Windows: &runtime.WindowsContainerConfig{}, - } - runtimeSpec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{}) - assert.NoError(t, err) - assert.NotNil(t, runtimeSpec) - - // check the runtime spec for expected commandline and args - actualCommandLine := runtimeSpec.Process.CommandLine - actualArgs := runtimeSpec.Process.Args - - require.Equal(t, actualArgs, test.expectedArgs) - require.Equal(t, actualCommandLine, test.expectedCommandLine) - }) - } -} diff --git a/pkg/cri/server/container_execsync.go b/pkg/cri/server/container_execsync.go index fa0d3fe0e..ba2c3a7d1 100644 --- a/pkg/cri/server/container_execsync.go +++ b/pkg/cri/server/container_execsync.go @@ -146,8 +146,6 @@ func (c *criService) execInternal(ctx context.Context, container containerd.Cont } pspec.Args = opts.cmd - // CommandLine may already be set on the container's spec, but we want to only use Args here. - pspec.CommandLine = "" if opts.stdout == nil { opts.stdout = cio.NewDiscardLogger() diff --git a/pkg/cri/server/container_start.go b/pkg/cri/server/container_start.go index 394bcd5a6..92ae32d34 100644 --- a/pkg/cri/server/container_start.go +++ b/pkg/cri/server/container_start.go @@ -149,6 +149,7 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain } } }() + err = c.nri.StartContainer(ctx, &sandbox, &cntr) if err != nil { log.G(ctx).WithError(err).Errorf("NRI container start failed") diff --git a/pkg/cri/server/container_start_test.go b/pkg/cri/server/container_start_test.go index 127cfc98f..0c457f4f1 100644 --- a/pkg/cri/server/container_start_test.go +++ b/pkg/cri/server/container_start_test.go @@ -34,7 +34,6 @@ func TestSetContainerStarting(t *testing.T) { status containerstore.Status expectErr bool }{ - { desc: "should not return error when container is in created state", status: containerstore.Status{ diff --git a/pkg/cri/server/container_stats.go b/pkg/cri/server/container_stats.go index 0ca66eef3..e2ca5f1ac 100644 --- a/pkg/cri/server/container_stats.go +++ b/pkg/cri/server/container_stats.go @@ -20,7 +20,7 @@ import ( "context" "fmt" - tasks "github.com/containerd/containerd/api/services/tasks/v1" + "github.com/containerd/containerd/api/services/tasks/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) @@ -40,7 +40,12 @@ func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerSt return nil, fmt.Errorf("unexpected metrics response: %+v", resp.Metrics) } - cs, err := c.containerMetrics(cntr.Metadata, resp.Metrics[0]) + handler, err := c.getMetricsHandler(ctx, cntr.SandboxID) + if err != nil { + return nil, err + } + + cs, err := handler(cntr.Metadata, resp.Metrics[0]) if err != nil { return nil, fmt.Errorf("failed to decode container metrics: %w", err) } diff --git a/pkg/cri/server/container_stats_list.go b/pkg/cri/server/container_stats_list.go index cb61eecb3..7c70a4854 100644 --- a/pkg/cri/server/container_stats_list.go +++ b/pkg/cri/server/container_stats_list.go @@ -18,13 +18,21 @@ package server import ( "context" + "errors" "fmt" + "reflect" "time" - "github.com/containerd/containerd/pkg/cri/store/stats" - - tasks "github.com/containerd/containerd/api/services/tasks/v1" + wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + cg1 "github.com/containerd/cgroups/v3/cgroup1/stats" + cg2 "github.com/containerd/cgroups/v3/cgroup2/stats" + "github.com/containerd/containerd/api/services/tasks/v1" "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/pkg/cri/store/stats" + "github.com/containerd/containerd/protobuf" + "github.com/containerd/log" + "github.com/containerd/typeurl/v2" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" containerstore "github.com/containerd/containerd/pkg/cri/store/container" @@ -43,14 +51,48 @@ func (c *criService) ListContainerStats( if err != nil { return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err) } - criStats, err := c.toCRIContainerStats(resp.Metrics, containers) + criStats, err := c.toCRIContainerStats(ctx, resp.Metrics, containers) if err != nil { return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err) } return criStats, nil } +type metricsHandler func(containerstore.Metadata, *types.Metric) (*runtime.ContainerStats, error) + +// Returns a function to be used for transforming container metrics into the right format. +// Uses the platform the given sandbox advertises to implement its logic. If the platform is +// unsupported for metrics this will return a wrapped [errdefs.ErrNotImplemented]. +func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (metricsHandler, error) { + sandbox, err := c.sandboxStore.Get(sandboxID) + if err != nil { + return nil, fmt.Errorf("failed to find sandbox id %q: %w", sandboxID, err) + } + controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox controller: %w", err) + } + // Grab the platform that this containers sandbox advertises. Reason being, even if + // the host may be {insert platform}, if it virtualizes or emulates a different platform + // it will return stats in that format, and we need to handle the conversion logic based + // off of this info. + p, err := controller.Platform(ctx, sandboxID) + if err != nil { + return nil, err + } + + switch p.OS { + case "windows": + return c.windowsContainerMetrics, nil + case "linux": + return c.linuxContainerMetrics, nil + default: + return nil, fmt.Errorf("container metrics for platform %+v: %w", p, errdefs.ErrNotImplemented) + } +} + func (c *criService) toCRIContainerStats( + ctx context.Context, stats []*types.Metric, containers []containerstore.Container, ) (*runtime.ListContainerStatsResponse, error) { @@ -59,8 +101,37 @@ func (c *criService) toCRIContainerStats( statsMap[stat.ID] = stat } containerStats := new(runtime.ListContainerStatsResponse) + + // Unfortunately if no filter was passed we're asking for every containers stats which + // generally belong to multiple different pods, who all might have different platforms. + // To avoid recalculating the right metricsHandler to invoke, if we've already calculated + // the platform and handler for a given sandbox just pull it from our map here. + var ( + err error + handler metricsHandler + ) + sandboxToMetricsHandler := make(map[string]metricsHandler) for _, cntr := range containers { - cs, err := c.containerMetrics(cntr.Metadata, statsMap[cntr.ID]) + h, ok := sandboxToMetricsHandler[cntr.SandboxID] + if !ok { + handler, err = c.getMetricsHandler(ctx, cntr.SandboxID) + if err != nil { + // If the sandbox is not found, it may have been removed. we need to check container whether it is still exist + if errdefs.IsNotFound(err) { + _, err = c.containerStore.Get(cntr.ID) + if err != nil && errdefs.IsNotFound(err) { + log.G(ctx).Warnf("container %q is not found, skip it", cntr.ID) + continue + } + } + return nil, fmt.Errorf("failed to get metrics handler for container %q: %w", cntr.ID, err) + } + sandboxToMetricsHandler[cntr.SandboxID] = handler + } else { + handler = h + } + + cs, err := handler(cntr.Metadata, statsMap[cntr.ID]) if err != nil { return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err) } @@ -73,7 +144,6 @@ func (c *criService) toCRIContainerStats( } cs.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoUsage} } - containerStats.Stats = append(containerStats.Stats, cs) } return containerStats, nil @@ -134,7 +204,6 @@ func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, curre if err != nil { return 0, fmt.Errorf("failed to update sandbox container stats: %s: %w", containerID, err) } - } else { err := c.containerStore.UpdateContainerStats(containerID, newStats) if err != nil { @@ -194,3 +263,238 @@ func matchLabelSelector(selector, labels map[string]string) bool { } return true } + +func (c *criService) windowsContainerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + var cs runtime.ContainerStats + var usedBytes, inodesUsed uint64 + sn, err := c.GetSnapshot(meta.ID) + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + if err == nil { + usedBytes = sn.Size + inodesUsed = sn.Inodes + } + cs.WritableLayer = &runtime.FilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, + } + cs.Attributes = &runtime.ContainerAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + } + + if stats != nil { + s, err := typeurl.UnmarshalAny(stats.Data) + if err != nil { + return nil, fmt.Errorf("failed to extract container metrics: %w", err) + } + wstats := s.(*wstats.Statistics).GetWindows() + if wstats == nil { + return nil, errors.New("windows stats is empty") + } + if wstats.Processor != nil { + cs.Cpu = &runtime.CpuUsage{ + Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, + } + } + if wstats.Memory != nil { + cs.Memory = &runtime.MemoryUsage{ + Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, + }, + } + } + } + return &cs, nil +} + +func (c *criService) linuxContainerMetrics( + meta containerstore.Metadata, + stats *types.Metric, +) (*runtime.ContainerStats, error) { + var cs runtime.ContainerStats + var usedBytes, inodesUsed uint64 + sn, err := c.GetSnapshot(meta.ID) + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + if err == nil { + usedBytes = sn.Size + inodesUsed = sn.Inodes + } + cs.WritableLayer = &runtime.FilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, + } + cs.Attributes = &runtime.ContainerAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + } + + if stats != nil { + var data interface{} + switch { + case typeurl.Is(stats.Data, (*cg1.Metrics)(nil)): + data = &cg1.Metrics{} + case typeurl.Is(stats.Data, (*cg2.Metrics)(nil)): + data = &cg2.Metrics{} + case typeurl.Is(stats.Data, (*wstats.Statistics)(nil)): + data = &wstats.Statistics{} + default: + return nil, errors.New("cannot convert metric data to cgroups.Metrics or windows.Statistics") + } + + if err := typeurl.UnmarshalTo(stats.Data, data); err != nil { + return nil, fmt.Errorf("failed to extract container metrics: %w", err) + } + + cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp)) + if err != nil { + return nil, fmt.Errorf("failed to obtain cpu stats: %w", err) + } + cs.Cpu = cpuStats + + memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp)) + if err != nil { + return nil, fmt.Errorf("failed to obtain memory stats: %w", err) + } + cs.Memory = memoryStats + } + + return &cs, nil +} + +// getWorkingSet calculates workingset memory from cgroup memory stats. +// The caller should make sure memory is not nil. +// workingset = usage - total_inactive_file +func getWorkingSet(memory *cg1.MemoryStat) uint64 { + if memory.Usage == nil { + return 0 + } + var workingSet uint64 + if memory.TotalInactiveFile < memory.Usage.Usage { + workingSet = memory.Usage.Usage - memory.TotalInactiveFile + } + return workingSet +} + +// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats. +// The caller should make sure memory is not nil. +// workingset = usage - inactive_file +func getWorkingSetV2(memory *cg2.MemoryStat) uint64 { + var workingSet uint64 + if memory.InactiveFile < memory.Usage { + workingSet = memory.Usage - memory.InactiveFile + } + return workingSet +} + +func isMemoryUnlimited(v uint64) bool { + // Size after which we consider memory to be "unlimited". This is not + // MaxInt64 due to rounding by the kernel. + // TODO: k8s or cadvisor should export this https://github.com/google/cadvisor/blob/2b6fbacac7598e0140b5bc8428e3bdd7d86cf5b9/metrics/prometheus.go#L1969-L1971 + const maxMemorySize = uint64(1 << 62) + + return v > maxMemorySize +} + +// https://github.com/kubernetes/kubernetes/blob/b47f8263e18c7b13dba33fba23187e5e0477cdbd/pkg/kubelet/stats/helper.go#L68-L71 +func getAvailableBytes(memory *cg1.MemoryStat, workingSetBytes uint64) uint64 { + // memory limit - working set bytes + if !isMemoryUnlimited(memory.Usage.Limit) { + return memory.Usage.Limit - workingSetBytes + } + return 0 +} + +func getAvailableBytesV2(memory *cg2.MemoryStat, workingSetBytes uint64) uint64 { + // memory limit (memory.max) for cgroupv2 - working set bytes + if !isMemoryUnlimited(memory.UsageLimit) { + return memory.UsageLimit - workingSetBytes + } + return 0 +} + +func (c *criService) cpuContainerStats(ID string, isSandbox bool, stats interface{}, timestamp time.Time) (*runtime.CpuUsage, error) { + switch metrics := stats.(type) { + case *cg1.Metrics: + metrics.GetCPU().GetUsage() + if metrics.CPU != nil && metrics.CPU.Usage != nil { + return &runtime.CpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total}, + }, nil + } + case *cg2.Metrics: + if metrics.CPU != nil { + // convert to nano seconds + usageCoreNanoSeconds := metrics.CPU.UsageUsec * 1000 + + return &runtime.CpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: usageCoreNanoSeconds}, + }, nil + } + default: + return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath()) + } + return nil, nil +} + +func (c *criService) memoryContainerStats(ID string, stats interface{}, timestamp time.Time) (*runtime.MemoryUsage, error) { + switch metrics := stats.(type) { + case *cg1.Metrics: + if metrics.Memory != nil && metrics.Memory.Usage != nil { + workingSetBytes := getWorkingSet(metrics.Memory) + + return &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: workingSetBytes, + }, + AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytes(metrics.Memory, workingSetBytes)}, + UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage.Usage}, + RssBytes: &runtime.UInt64Value{Value: metrics.Memory.TotalRSS}, + PageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgFault}, + MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgMajFault}, + }, nil + } + case *cg2.Metrics: + if metrics.Memory != nil { + workingSetBytes := getWorkingSetV2(metrics.Memory) + + return &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: workingSetBytes, + }, + AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytesV2(metrics.Memory, workingSetBytes)}, + UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage}, + // Use Anon memory for RSS as cAdvisor on cgroupv2 + // see https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799 + RssBytes: &runtime.UInt64Value{Value: metrics.Memory.Anon}, + PageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgfault}, + MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgmajfault}, + }, nil + } + default: + return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath()) + } + return nil, nil +} diff --git a/pkg/cri/server/container_stats_list_linux.go b/pkg/cri/server/container_stats_list_linux.go deleted file mode 100644 index ee46db934..000000000 --- a/pkg/cri/server/container_stats_list_linux.go +++ /dev/null @@ -1,214 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "errors" - "fmt" - "reflect" - "time" - - wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" - v1 "github.com/containerd/cgroups/v3/cgroup1/stats" - v2 "github.com/containerd/cgroups/v3/cgroup2/stats" - "github.com/containerd/containerd/api/types" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/typeurl/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -func (c *criService) containerMetrics( - meta containerstore.Metadata, - stats *types.Metric, -) (*runtime.ContainerStats, error) { - var cs runtime.ContainerStats - var usedBytes, inodesUsed uint64 - sn, err := c.snapshotStore.Get(meta.ID) - // If snapshotstore doesn't have cached snapshot information - // set WritableLayer usage to zero - if err == nil { - usedBytes = sn.Size - inodesUsed = sn.Inodes - } - cs.WritableLayer = &runtime.FilesystemUsage{ - Timestamp: sn.Timestamp, - FsId: &runtime.FilesystemIdentifier{ - Mountpoint: c.imageFSPath, - }, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, - } - cs.Attributes = &runtime.ContainerAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - } - - if stats != nil { - var data interface{} - switch { - case typeurl.Is(stats.Data, (*v1.Metrics)(nil)): - data = &v1.Metrics{} - case typeurl.Is(stats.Data, (*v2.Metrics)(nil)): - data = &v2.Metrics{} - case typeurl.Is(stats.Data, (*wstats.Statistics)(nil)): - data = &wstats.Statistics{} - default: - return nil, errors.New("cannot convert metric data to cgroups.Metrics or windows.Statistics") - } - - if err := typeurl.UnmarshalTo(stats.Data, data); err != nil { - return nil, fmt.Errorf("failed to extract container metrics: %w", err) - } - - cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp)) - if err != nil { - return nil, fmt.Errorf("failed to obtain cpu stats: %w", err) - } - cs.Cpu = cpuStats - - memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp)) - if err != nil { - return nil, fmt.Errorf("failed to obtain memory stats: %w", err) - } - cs.Memory = memoryStats - } - - return &cs, nil -} - -// getWorkingSet calculates workingset memory from cgroup memory stats. -// The caller should make sure memory is not nil. -// workingset = usage - total_inactive_file -func getWorkingSet(memory *v1.MemoryStat) uint64 { - if memory.Usage == nil { - return 0 - } - var workingSet uint64 - if memory.TotalInactiveFile < memory.Usage.Usage { - workingSet = memory.Usage.Usage - memory.TotalInactiveFile - } - return workingSet -} - -// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats. -// The caller should make sure memory is not nil. -// workingset = usage - inactive_file -func getWorkingSetV2(memory *v2.MemoryStat) uint64 { - var workingSet uint64 - if memory.InactiveFile < memory.Usage { - workingSet = memory.Usage - memory.InactiveFile - } - return workingSet -} - -func isMemoryUnlimited(v uint64) bool { - // Size after which we consider memory to be "unlimited". This is not - // MaxInt64 due to rounding by the kernel. - // TODO: k8s or cadvisor should export this https://github.com/google/cadvisor/blob/2b6fbacac7598e0140b5bc8428e3bdd7d86cf5b9/metrics/prometheus.go#L1969-L1971 - const maxMemorySize = uint64(1 << 62) - - return v > maxMemorySize -} - -// https://github.com/kubernetes/kubernetes/blob/b47f8263e18c7b13dba33fba23187e5e0477cdbd/pkg/kubelet/stats/helper.go#L68-L71 -func getAvailableBytes(memory *v1.MemoryStat, workingSetBytes uint64) uint64 { - // memory limit - working set bytes - if !isMemoryUnlimited(memory.Usage.Limit) { - return memory.Usage.Limit - workingSetBytes - } - return 0 -} - -func getAvailableBytesV2(memory *v2.MemoryStat, workingSetBytes uint64) uint64 { - // memory limit (memory.max) for cgroupv2 - working set bytes - if !isMemoryUnlimited(memory.UsageLimit) { - return memory.UsageLimit - workingSetBytes - } - return 0 -} - -func (c *criService) cpuContainerStats(ID string, isSandbox bool, stats interface{}, timestamp time.Time) (*runtime.CpuUsage, error) { - switch metrics := stats.(type) { - case *v1.Metrics: - if metrics.CPU != nil && metrics.CPU.Usage != nil { - - return &runtime.CpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total}, - }, nil - } - case *v2.Metrics: - if metrics.CPU != nil { - // convert to nano seconds - usageCoreNanoSeconds := metrics.CPU.UsageUsec * 1000 - - return &runtime.CpuUsage{ - Timestamp: timestamp.UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: usageCoreNanoSeconds}, - }, nil - } - default: - return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath()) - } - return nil, nil -} - -func (c *criService) memoryContainerStats(ID string, stats interface{}, timestamp time.Time) (*runtime.MemoryUsage, error) { - switch metrics := stats.(type) { - case *v1.Metrics: - if metrics.Memory != nil && metrics.Memory.Usage != nil { - workingSetBytes := getWorkingSet(metrics.Memory) - - return &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: workingSetBytes, - }, - AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytes(metrics.Memory, workingSetBytes)}, - UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage.Usage}, - RssBytes: &runtime.UInt64Value{Value: metrics.Memory.TotalRSS}, - PageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgFault}, - MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgMajFault}, - }, nil - } - case *v2.Metrics: - if metrics.Memory != nil { - workingSetBytes := getWorkingSetV2(metrics.Memory) - - return &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: workingSetBytes, - }, - AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytesV2(metrics.Memory, workingSetBytes)}, - UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage}, - // Use Anon memory for RSS as cAdvisor on cgroupv2 - // see https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799 - RssBytes: &runtime.UInt64Value{Value: metrics.Memory.Anon}, - PageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgfault}, - MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgmajfault}, - }, nil - } - default: - return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath()) - } - return nil, nil -} diff --git a/pkg/cri/server/container_stats_list_linux_test.go b/pkg/cri/server/container_stats_list_linux_test.go deleted file mode 100644 index f061a412f..000000000 --- a/pkg/cri/server/container_stats_list_linux_test.go +++ /dev/null @@ -1,283 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "math" - "testing" - "time" - - v1 "github.com/containerd/cgroups/v3/cgroup1/stats" - v2 "github.com/containerd/cgroups/v3/cgroup2/stats" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestGetWorkingSet(t *testing.T) { - for _, test := range []struct { - desc string - memory *v1.MemoryStat - expected uint64 - }{ - { - desc: "nil memory usage", - memory: &v1.MemoryStat{}, - expected: 0, - }, - { - desc: "memory usage higher than inactive_total_file", - memory: &v1.MemoryStat{ - TotalInactiveFile: 1000, - Usage: &v1.MemoryEntry{Usage: 2000}, - }, - expected: 1000, - }, - { - desc: "memory usage lower than inactive_total_file", - memory: &v1.MemoryStat{ - TotalInactiveFile: 2000, - Usage: &v1.MemoryEntry{Usage: 1000}, - }, - expected: 0, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getWorkingSet(test.memory) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestGetWorkingSetV2(t *testing.T) { - for _, test := range []struct { - desc string - memory *v2.MemoryStat - expected uint64 - }{ - { - desc: "nil memory usage", - memory: &v2.MemoryStat{}, - expected: 0, - }, - { - desc: "memory usage higher than inactive_total_file", - memory: &v2.MemoryStat{ - InactiveFile: 1000, - Usage: 2000, - }, - expected: 1000, - }, - { - desc: "memory usage lower than inactive_total_file", - memory: &v2.MemoryStat{ - InactiveFile: 2000, - Usage: 1000, - }, - expected: 0, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getWorkingSetV2(test.memory) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestGetAvailableBytes(t *testing.T) { - for _, test := range []struct { - desc string - memory *v1.MemoryStat - workingSetBytes uint64 - expected uint64 - }{ - - { - desc: "no limit", - memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: math.MaxUint64, // no limit - Usage: 1000, - }, - }, - workingSetBytes: 500, - expected: 0, - }, - { - desc: "with limit", - memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: 5000, - Usage: 1000, - }, - }, - workingSetBytes: 500, - expected: 5000 - 500, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getAvailableBytes(test.memory, test.workingSetBytes) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestGetAvailableBytesV2(t *testing.T) { - for _, test := range []struct { - desc string - memory *v2.MemoryStat - workingSetBytes uint64 - expected uint64 - }{ - - { - desc: "no limit", - memory: &v2.MemoryStat{ - UsageLimit: math.MaxUint64, // no limit - Usage: 1000, - }, - workingSetBytes: 500, - expected: 0, - }, - { - desc: "with limit", - memory: &v2.MemoryStat{ - UsageLimit: 5000, - Usage: 1000, - }, - workingSetBytes: 500, - expected: 5000 - 500, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getAvailableBytesV2(test.memory, test.workingSetBytes) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestContainerMetricsMemory(t *testing.T) { - c := newTestCRIService() - timestamp := time.Now() - - for _, test := range []struct { - desc string - metrics interface{} - expected *runtime.MemoryUsage - }{ - { - desc: "v1 metrics - no memory limit", - metrics: &v1.Metrics{ - Memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: math.MaxUint64, // no limit - Usage: 1000, - }, - TotalRSS: 10, - TotalPgFault: 11, - TotalPgMajFault: 12, - TotalInactiveFile: 500, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 500}, - AvailableBytes: &runtime.UInt64Value{Value: 0}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 10}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - { - desc: "v1 metrics - memory limit", - metrics: &v1.Metrics{ - Memory: &v1.MemoryStat{ - Usage: &v1.MemoryEntry{ - Limit: 5000, - Usage: 1000, - }, - TotalRSS: 10, - TotalPgFault: 11, - TotalPgMajFault: 12, - TotalInactiveFile: 500, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 500}, - AvailableBytes: &runtime.UInt64Value{Value: 4500}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 10}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - { - desc: "v2 metrics - memory limit", - metrics: &v2.Metrics{ - Memory: &v2.MemoryStat{ - Usage: 1000, - UsageLimit: 5000, - InactiveFile: 0, - Pgfault: 11, - Pgmajfault: 12, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, - AvailableBytes: &runtime.UInt64Value{Value: 4000}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 0}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - { - desc: "v2 metrics - no memory limit", - metrics: &v2.Metrics{ - Memory: &v2.MemoryStat{ - Usage: 1000, - UsageLimit: math.MaxUint64, // no limit - InactiveFile: 0, - Pgfault: 11, - Pgmajfault: 12, - }, - }, - expected: &runtime.MemoryUsage{ - Timestamp: timestamp.UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, - AvailableBytes: &runtime.UInt64Value{Value: 0}, - UsageBytes: &runtime.UInt64Value{Value: 1000}, - RssBytes: &runtime.UInt64Value{Value: 0}, - PageFaults: &runtime.UInt64Value{Value: 11}, - MajorPageFaults: &runtime.UInt64Value{Value: 12}, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got, err := c.memoryContainerStats("ID", test.metrics, timestamp) - assert.NoError(t, err) - assert.Equal(t, test.expected, got) - }) - } -} diff --git a/pkg/cri/server/container_stats_list_other.go b/pkg/cri/server/container_stats_list_other.go deleted file mode 100644 index 7f1fe2e3c..000000000 --- a/pkg/cri/server/container_stats_list_other.go +++ /dev/null @@ -1,37 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "fmt" - - "github.com/containerd/containerd/api/types" - "github.com/containerd/containerd/errdefs" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -func (c *criService) containerMetrics( - meta containerstore.Metadata, - stats *types.Metric, -) (*runtime.ContainerStats, error) { - var cs runtime.ContainerStats - return &cs, fmt.Errorf("container metrics: %w", errdefs.ErrNotImplemented) -} diff --git a/pkg/cri/server/container_stats_list_test.go b/pkg/cri/server/container_stats_list_test.go index 70b34d991..723249008 100644 --- a/pkg/cri/server/container_stats_list_test.go +++ b/pkg/cri/server/container_stats_list_test.go @@ -17,11 +17,19 @@ package server import ( + "context" + "math" + "reflect" "testing" "time" + v1 "github.com/containerd/cgroups/v3/cgroup1/stats" + v2 "github.com/containerd/cgroups/v3/cgroup2/stats" + "github.com/containerd/containerd/api/types" containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) func TestContainerMetricsCPUNanoCoreUsage(t *testing.T) { @@ -73,5 +81,357 @@ func TestContainerMetricsCPUNanoCoreUsage(t *testing.T) { assert.NotNil(t, container.Stats) }) } +} + +func TestGetWorkingSet(t *testing.T) { + for _, test := range []struct { + desc string + memory *v1.MemoryStat + expected uint64 + }{ + { + desc: "nil memory usage", + memory: &v1.MemoryStat{}, + expected: 0, + }, + { + desc: "memory usage higher than inactive_total_file", + memory: &v1.MemoryStat{ + TotalInactiveFile: 1000, + Usage: &v1.MemoryEntry{Usage: 2000}, + }, + expected: 1000, + }, + { + desc: "memory usage lower than inactive_total_file", + memory: &v1.MemoryStat{ + TotalInactiveFile: 2000, + Usage: &v1.MemoryEntry{Usage: 1000}, + }, + expected: 0, + }, + } { + test := test + t.Run(test.desc, func(t *testing.T) { + got := getWorkingSet(test.memory) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestGetWorkingSetV2(t *testing.T) { + for _, test := range []struct { + desc string + memory *v2.MemoryStat + expected uint64 + }{ + { + desc: "nil memory usage", + memory: &v2.MemoryStat{}, + expected: 0, + }, + { + desc: "memory usage higher than inactive_total_file", + memory: &v2.MemoryStat{ + InactiveFile: 1000, + Usage: 2000, + }, + expected: 1000, + }, + { + desc: "memory usage lower than inactive_total_file", + memory: &v2.MemoryStat{ + InactiveFile: 2000, + Usage: 1000, + }, + expected: 0, + }, + } { + test := test + t.Run(test.desc, func(t *testing.T) { + got := getWorkingSetV2(test.memory) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestGetAvailableBytes(t *testing.T) { + for _, test := range []struct { + desc string + memory *v1.MemoryStat + workingSetBytes uint64 + expected uint64 + }{ + { + desc: "no limit", + memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: math.MaxUint64, // no limit + Usage: 1000, + }, + }, + workingSetBytes: 500, + expected: 0, + }, + { + desc: "with limit", + memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: 5000, + Usage: 1000, + }, + }, + workingSetBytes: 500, + expected: 5000 - 500, + }, + } { + test := test + t.Run(test.desc, func(t *testing.T) { + got := getAvailableBytes(test.memory, test.workingSetBytes) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestGetAvailableBytesV2(t *testing.T) { + for _, test := range []struct { + desc string + memory *v2.MemoryStat + workingSetBytes uint64 + expected uint64 + }{ + { + desc: "no limit", + memory: &v2.MemoryStat{ + UsageLimit: math.MaxUint64, // no limit + Usage: 1000, + }, + workingSetBytes: 500, + expected: 0, + }, + { + desc: "with limit", + memory: &v2.MemoryStat{ + UsageLimit: 5000, + Usage: 1000, + }, + workingSetBytes: 500, + expected: 5000 - 500, + }, + } { + test := test + t.Run(test.desc, func(t *testing.T) { + got := getAvailableBytesV2(test.memory, test.workingSetBytes) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestContainerMetricsMemory(t *testing.T) { + c := newTestCRIService() + timestamp := time.Now() + + for _, test := range []struct { + desc string + metrics interface{} + expected *runtime.MemoryUsage + }{ + { + desc: "v1 metrics - no memory limit", + metrics: &v1.Metrics{ + Memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: math.MaxUint64, // no limit + Usage: 1000, + }, + TotalRSS: 10, + TotalPgFault: 11, + TotalPgMajFault: 12, + TotalInactiveFile: 500, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 500}, + AvailableBytes: &runtime.UInt64Value{Value: 0}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 10}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + { + desc: "v1 metrics - memory limit", + metrics: &v1.Metrics{ + Memory: &v1.MemoryStat{ + Usage: &v1.MemoryEntry{ + Limit: 5000, + Usage: 1000, + }, + TotalRSS: 10, + TotalPgFault: 11, + TotalPgMajFault: 12, + TotalInactiveFile: 500, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 500}, + AvailableBytes: &runtime.UInt64Value{Value: 4500}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 10}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + { + desc: "v2 metrics - memory limit", + metrics: &v2.Metrics{ + Memory: &v2.MemoryStat{ + Usage: 1000, + UsageLimit: 5000, + InactiveFile: 0, + Pgfault: 11, + Pgmajfault: 12, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, + AvailableBytes: &runtime.UInt64Value{Value: 4000}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 0}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + { + desc: "v2 metrics - no memory limit", + metrics: &v2.Metrics{ + Memory: &v2.MemoryStat{ + Usage: 1000, + UsageLimit: math.MaxUint64, // no limit + InactiveFile: 0, + Pgfault: 11, + Pgmajfault: 12, + }, + }, + expected: &runtime.MemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 1000}, + AvailableBytes: &runtime.UInt64Value{Value: 0}, + UsageBytes: &runtime.UInt64Value{Value: 1000}, + RssBytes: &runtime.UInt64Value{Value: 0}, + PageFaults: &runtime.UInt64Value{Value: 11}, + MajorPageFaults: &runtime.UInt64Value{Value: 12}, + }, + }, + } { + test := test + t.Run(test.desc, func(t *testing.T) { + got, err := c.memoryContainerStats("ID", test.metrics, timestamp) + assert.NoError(t, err) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestListContainerStats(t *testing.T) { + c := newTestCRIService() + type args struct { + ctx context.Context + stats []*types.Metric + containers []containerstore.Container + } + tests := []struct { + name string + args args + before func() + after func() + want *runtime.ListContainerStatsResponse + wantErr bool + }{ + { + name: "args containers having c1,but containerStore not found c1, so filter c1", + args: args{ + ctx: context.Background(), + stats: []*types.Metric{ + { + ID: "c1", + }, + }, + containers: []containerstore.Container{ + { + Metadata: containerstore.Metadata{ + ID: "c1", + SandboxID: "s1", + }, + }, + }, + }, + want: &runtime.ListContainerStatsResponse{}, + }, + { + name: "args containers having c1,c2, but containerStore not found c1, so filter c1", + args: args{ + ctx: context.Background(), + stats: []*types.Metric{ + { + ID: "c1", + }, + { + ID: "c2", + }, + }, + containers: []containerstore.Container{ + { + Metadata: containerstore.Metadata{ + ID: "c1", + SandboxID: "s1", + }, + }, + { + Metadata: containerstore.Metadata{ + ID: "c2", + SandboxID: "s2", + }, + }, + }, + }, + before: func() { + c.containerStore.Add(containerstore.Container{ + Metadata: containerstore.Metadata{ + ID: "c2", + }, + }) + c.sandboxStore.Add(sandboxstore.Sandbox{ + Metadata: sandboxstore.Metadata{ + ID: "s2", + }, + }) + }, + wantErr: true, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } + got, err := c.toCRIContainerStats(tt.args.ctx, tt.args.stats, tt.args.containers) + if tt.after != nil { + tt.after() + } + if (err != nil) != tt.wantErr { + t.Errorf("ListContainerStats() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ListContainerStats() = %v, want %v", got, tt.want) + } + }) + } } diff --git a/pkg/cri/server/container_stats_list_windows.go b/pkg/cri/server/container_stats_list_windows.go deleted file mode 100644 index f3bdcb68f..000000000 --- a/pkg/cri/server/container_stats_list_windows.go +++ /dev/null @@ -1,85 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "errors" - "fmt" - - wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" - "github.com/containerd/containerd/api/types" - "github.com/containerd/containerd/protobuf" - "github.com/containerd/typeurl/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - containerstore "github.com/containerd/containerd/pkg/cri/store/container" -) - -func (c *criService) containerMetrics( - meta containerstore.Metadata, - stats *types.Metric, -) (*runtime.ContainerStats, error) { - var cs runtime.ContainerStats - var usedBytes, inodesUsed uint64 - sn, err := c.snapshotStore.Get(meta.ID) - // If snapshotstore doesn't have cached snapshot information - // set WritableLayer usage to zero - if err == nil { - usedBytes = sn.Size - inodesUsed = sn.Inodes - } - cs.WritableLayer = &runtime.FilesystemUsage{ - Timestamp: sn.Timestamp, - FsId: &runtime.FilesystemIdentifier{ - Mountpoint: c.imageFSPath, - }, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, - } - cs.Attributes = &runtime.ContainerAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - } - - if stats != nil { - s, err := typeurl.UnmarshalAny(stats.Data) - if err != nil { - return nil, fmt.Errorf("failed to extract container metrics: %w", err) - } - wstats := s.(*wstats.Statistics).GetWindows() - if wstats == nil { - return nil, errors.New("windows stats is empty") - } - if wstats.Processor != nil { - cs.Cpu = &runtime.CpuUsage{ - Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), - UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, - } - } - if wstats.Memory != nil { - cs.Memory = &runtime.MemoryUsage{ - Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(), - WorkingSetBytes: &runtime.UInt64Value{ - Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, - }, - } - } - } - return &cs, nil -} diff --git a/pkg/cri/server/container_status.go b/pkg/cri/server/container_status.go index ed3ba2929..721532257 100644 --- a/pkg/cri/server/container_status.go +++ b/pkg/cri/server/container_status.go @@ -22,6 +22,7 @@ import ( "fmt" "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/pkg/cri/server/images" containerstore "github.com/containerd/containerd/pkg/cri/store/container" runtimespec "github.com/opencontainers/runtime-spec/specs-go" @@ -42,13 +43,13 @@ func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerSt // * ImageRef in container status is repo digest. spec := container.Config.GetImage() imageRef := container.ImageRef - image, err := c.imageStore.Get(imageRef) + image, err := c.GetImage(imageRef) if err != nil { if !errdefs.IsNotFound(err) { return nil, fmt.Errorf("failed to get image %q: %w", imageRef, err) } } else { - repoTags, repoDigests := parseImageReferences(image.References) + repoTags, repoDigests := images.ParseImageReferences(image.References) if len(repoTags) > 0 { // Based on current behavior of dockershim, this field should be // image tag. @@ -60,7 +61,6 @@ func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerSt } } status := toCRIContainerStatus(container, spec, imageRef) - if status.GetCreatedAt() == 0 { // CRI doesn't allow CreatedAt == 0. info, err := container.Container.Info(ctx) diff --git a/pkg/cri/server/container_status_test.go b/pkg/cri/server/container_status_test.go index 9464d7ae8..39e10e543 100644 --- a/pkg/cri/server/container_status_test.go +++ b/pkg/cri/server/container_status_test.go @@ -18,9 +18,12 @@ package server import ( "context" + "errors" "testing" "time" + criconfig "github.com/containerd/containerd/pkg/cri/config" + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" "github.com/stretchr/testify/assert" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -242,8 +245,9 @@ func TestContainerStatus(t *testing.T) { assert.NoError(t, c.containerStore.Add(container)) } if test.imageExist { - c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{*image}) + imageStore, err := imagestore.NewFakeStore([]imagestore.Image{*image}) assert.NoError(t, err) + c.imageService = &fakeImageService{imageStore: imageStore} } resp, err := c.ContainerStatus(context.Background(), &runtime.ContainerStatusRequest{ContainerId: container.ID}) if test.expectErr { @@ -261,6 +265,27 @@ func TestContainerStatus(t *testing.T) { } } +type fakeImageService struct { + runtime.ImageServiceServer + imageStore *imagestore.Store +} + +func (s *fakeImageService) RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string { + return "" +} + +func (s *fakeImageService) UpdateImage(ctx context.Context, r string) error { return nil } + +func (s *fakeImageService) GetImage(id string) (imagestore.Image, error) { return s.imageStore.Get(id) } + +func (s *fakeImageService) GetSnapshot(key string) (snapshotstore.Snapshot, error) { + return snapshotstore.Snapshot{}, errors.New("not implemented") +} + +func (s *fakeImageService) LocalResolve(refOrID string) (imagestore.Image, error) { + return imagestore.Image{}, errors.New("not implemented") +} + func patchExceptedWithState(expected *runtime.ContainerStatus, state runtime.ContainerState) { expected.State = state switch state { diff --git a/pkg/cri/server/container_stop.go b/pkg/cri/server/container_stop.go index 56fd3b263..144400027 100644 --- a/pkg/cri/server/container_stop.go +++ b/pkg/cri/server/container_stop.go @@ -89,7 +89,7 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore } // Don't return for unknown state, some cleanup needs to be done. if state == runtime.ContainerState_CONTAINER_UNKNOWN { - return c.cleanupUnknownContainer(ctx, id, container, sandboxID) + return cleanupUnknownContainer(ctx, id, container, sandboxID, c) } return nil } @@ -104,7 +104,7 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore if !errdefs.IsNotFound(err) { return fmt.Errorf("failed to wait for task for %q: %w", id, err) } - return c.cleanupUnknownContainer(ctx, id, container, sandboxID) + return cleanupUnknownContainer(ctx, id, container, sandboxID, c) } exitCtx, exitCancel := context.WithCancel(context.Background()) @@ -133,7 +133,7 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore // default SIGTERM is still better than returning error and leaving // the container unstoppable. (See issue #990) // TODO(random-liu): Remove this logic when containerd 1.2 is deprecated. - image, err := c.imageStore.Get(container.ImageRef) + image, err := c.GetImage(container.ImageRef) if err != nil { if !errdefs.IsNotFound(err) { return fmt.Errorf("failed to get image %q: %w", container.ImageRef, err) @@ -207,7 +207,7 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers } // cleanupUnknownContainer cleanup stopped container in unknown state. -func (c *criService) cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container, sandboxID string) error { +func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container, sandboxID string, c *criService) error { // Reuse handleContainerExit to do the cleanup. return handleContainerExit(ctx, &eventtypes.TaskExit{ ContainerID: id, diff --git a/pkg/cri/server/container_update_resources.go b/pkg/cri/server/container_update_resources.go index 55e83e9b4..cbbe3492c 100644 --- a/pkg/cri/server/container_update_resources.go +++ b/pkg/cri/server/container_update_resources.go @@ -23,13 +23,14 @@ import ( gocontext "context" "fmt" + "github.com/containerd/typeurl/v2" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + "github.com/containerd/containerd" "github.com/containerd/containerd/containers" "github.com/containerd/containerd/errdefs" "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" containerstore "github.com/containerd/containerd/pkg/cri/store/container" ctrdutil "github.com/containerd/containerd/pkg/cri/util" diff --git a/pkg/cri/server/container_update_resources_linux_test.go b/pkg/cri/server/container_update_resources_linux_test.go index 59f1720f4..bb89d8cae 100644 --- a/pkg/cri/server/container_update_resources_linux_test.go +++ b/pkg/cri/server/container_update_resources_linux_test.go @@ -38,6 +38,7 @@ func TestUpdateOCILinuxResource(t *testing.T) { } return nil } + for _, test := range []struct { desc string spec *runtimespec.Spec diff --git a/pkg/cri/server/events.go b/pkg/cri/server/events.go index 243ac6ac3..cba4d2071 100644 --- a/pkg/cri/server/events.go +++ b/pkg/cri/server/events.go @@ -121,12 +121,10 @@ func (em *eventMonitor) startSandboxExitMonitor(ctx context.Context, id string, exitedAt = time.Now() } - e := &eventtypes.TaskExit{ - ContainerID: id, - ID: id, - Pid: pid, - ExitStatus: exitStatus, - ExitedAt: protobuf.ToTimestamp(exitedAt), + e := &eventtypes.SandboxExit{ + SandboxID: id, + ExitStatus: exitStatus, + ExitedAt: protobuf.ToTimestamp(exitedAt), } log.L.Debugf("received exit event %+v", e) @@ -136,14 +134,14 @@ func (em *eventMonitor) startSandboxExitMonitor(ctx context.Context, id string, dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout) defer dcancel() - sb, err := em.c.sandboxStore.Get(e.ID) + sb, err := em.c.sandboxStore.Get(e.GetSandboxID()) if err == nil { - if err := handleSandboxExit(dctx, e, sb, em.c); err != nil { + if err := handleSandboxExit(dctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil { return err } return nil } else if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get sandbox %s: %w", e.ID, err) + return fmt.Errorf("failed to get sandbox %s: %w", e.SandboxID, err) } return nil }() @@ -219,6 +217,8 @@ func convertEvent(e typeurl.Any) (string, interface{}, error) { switch e := evt.(type) { case *eventtypes.TaskOOM: id = e.ContainerID + case *eventtypes.SandboxExit: + id = e.SandboxID case *eventtypes.ImageCreate: id = e.Name case *eventtypes.ImageUpdate: @@ -311,7 +311,7 @@ func (em *eventMonitor) handleEvent(any interface{}) error { switch e := any.(type) { case *eventtypes.TaskExit: - log.G(ctx).Infof("TaskExit event %+v", e) + log.L.Infof("TaskExit event %+v", e) // Use ID instead of ContainerID to rule out TaskExit event for exec. cntr, err := em.c.containerStore.Get(e.ID) if err == nil { @@ -324,7 +324,19 @@ func (em *eventMonitor) handleEvent(any interface{}) error { } sb, err := em.c.sandboxStore.Get(e.ID) if err == nil { - if err := handleSandboxExit(ctx, e, sb, em.c); err != nil { + if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil { + return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err) + } + return nil + } else if !errdefs.IsNotFound(err) { + return fmt.Errorf("can't find sandbox for TaskExit event: %w", err) + } + return nil + case *eventtypes.SandboxExit: + log.L.Infof("SandboxExit event %+v", e) + sb, err := em.c.sandboxStore.Get(e.GetSandboxID()) + if err == nil { + if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil { return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err) } return nil @@ -333,7 +345,7 @@ func (em *eventMonitor) handleEvent(any interface{}) error { } return nil case *eventtypes.TaskOOM: - log.G(ctx).Infof("TaskOOM event %+v", e) + log.L.Infof("TaskOOM event %+v", e) // For TaskOOM, we only care which container it belongs to. cntr, err := em.c.containerStore.Get(e.ContainerID) if err != nil { @@ -350,14 +362,14 @@ func (em *eventMonitor) handleEvent(any interface{}) error { return fmt.Errorf("failed to update container status for TaskOOM event: %w", err) } case *eventtypes.ImageCreate: - log.G(ctx).Infof("ImageCreate event %+v", e) - return em.c.updateImage(ctx, e.Name) + log.L.Infof("ImageCreate event %+v", e) + return em.c.UpdateImage(ctx, e.Name) case *eventtypes.ImageUpdate: - log.G(ctx).Infof("ImageUpdate event %+v", e) - return em.c.updateImage(ctx, e.Name) + log.L.Infof("ImageUpdate event %+v", e) + return em.c.UpdateImage(ctx, e.Name) case *eventtypes.ImageDelete: - log.G(ctx).Infof("ImageDelete event %+v", e) - return em.c.updateImage(ctx, e.Name) + log.L.Infof("ImageDelete event %+v", e) + return em.c.UpdateImage(ctx, e.Name) } return nil @@ -381,7 +393,7 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta }, ) if err != nil { - if !errdefs.IsNotFound(err) { + if !errdefs.IsNotFound(err) && !errdefs.IsUnavailable(err) { return fmt.Errorf("failed to load task for container: %w", err) } } else { @@ -435,7 +447,7 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta return fmt.Errorf("failed to cleanup container %s in task-service: %w", cntr.Container.ID(), err) } } - log.G(ctx).Infof("Ensure that container %s in task-service has been cleanup successfully", cntr.Container.ID()) + log.L.Infof("Ensure that container %s in task-service has been cleanup successfully", cntr.Container.ID()) } err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { @@ -448,7 +460,7 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta // Unknown state can only transit to EXITED state, so we need // to handle unknown state here. if status.Unknown { - log.G(ctx).Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID) + log.L.Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID) status.Unknown = false } return status, nil @@ -462,75 +474,18 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta return nil } -// handleSandboxExit handles TaskExit event for sandbox. -func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxstore.Sandbox, c *criService) error { - // No stream attached to sandbox container. - task, err := sb.Container.Task(ctx, nil) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to load task for sandbox: %w", err) - } - } else { - // TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker - if _, err = task.Delete(ctx, containerd.WithProcessKill); err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to stop sandbox: %w", err) - } - // Move on to make sure container status is updated. - } - } - - // NOTE: Both sb.Container.Task and task.Delete interface always ensures - // that the status of target task. However, the interfaces return - // ErrNotFound, which doesn't mean that the shim instance doesn't exist. - // - // There are two caches for task in containerd: - // - // 1. io.containerd.service.v1.tasks-service - // 2. io.containerd.runtime.v2.task - // - // First one is to maintain the shim connection and shutdown the shim - // in Delete API. And the second one is to maintain the lifecycle of - // task in shim server. - // - // So, if the shim instance is running and task has been deleted in shim - // server, the sb.Container.Task and task.Delete will receive the - // ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service, - // shim will be leaky. - // - // Based on containerd/containerd#7496 issue, when host is under IO - // pressure, the umount2 syscall will take more than 10 seconds so that - // the CRI plugin will cancel this task.Delete call. However, the shim - // server isn't aware about this. After return from umount2 syscall, the - // shim server continue delete the task record. And then CRI plugin - // retries to delete task and retrieves ErrNotFound and marks it as - // stopped. Therefore, The shim is leaky. - // - // It's hard to handle the connection lost or request canceled cases in - // shim server. We should call Delete API to io.containerd.service.v1.tasks-service - // to ensure that shim instance is shutdown. - // - // REF: - // 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968 - // 2. https://github.com/containerd/containerd/issues/8931 - if errdefs.IsNotFound(err) { - _, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: sb.Container.ID()}) - if err != nil { - err = errdefs.FromGRPC(err) - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to cleanup sandbox %s in task-service: %w", sb.Container.ID(), err) - } - } - log.G(ctx).Infof("Ensure that sandbox %s in task-service has been cleanup successfully", sb.Container.ID()) - } - err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { +// handleSandboxExit handles sandbox exit event. +func handleSandboxExit(ctx context.Context, sb sandboxstore.Sandbox, exitStatus uint32, exitTime time.Time, c *criService) error { + if err := sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { status.State = sandboxstore.StateNotReady status.Pid = 0 + status.ExitStatus = exitStatus + status.ExitedAt = exitTime return status, nil - }) - if err != nil { + }); err != nil { return fmt.Errorf("failed to update sandbox state: %w", err) } + // Using channel to propagate the information of sandbox stop sb.Stop() c.generateAndSendContainerEvent(ctx, sb.ID, sb.ID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT) diff --git a/pkg/cri/server/fuzz.go b/pkg/cri/server/fuzz.go index 347be7feb..1f3fe1f90 100644 --- a/pkg/cri/server/fuzz.go +++ b/pkg/cri/server/fuzz.go @@ -27,7 +27,7 @@ import ( func SandboxStore(cs CRIService) (*sandbox.Store, error) { s, ok := cs.(*criService) if !ok { - return nil, fmt.Errorf("%+v is not server.criService", cs) + return nil, fmt.Errorf("%+v is not sbserver.criService", cs) } return s.sandboxStore, nil } diff --git a/pkg/cri/server/helpers.go b/pkg/cri/server/helpers.go index f6a4fdaad..45f8460cb 100644 --- a/pkg/cri/server/helpers.go +++ b/pkg/cri/server/helpers.go @@ -21,11 +21,18 @@ import ( "fmt" "path" "path/filepath" + "regexp" goruntime "runtime" "strconv" "strings" "time" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/containerd/typeurl/v2" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pelletier/go-toml/v2" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + "github.com/containerd/containerd" "github.com/containerd/containerd/containers" "github.com/containerd/containerd/errdefs" @@ -33,21 +40,14 @@ import ( criconfig "github.com/containerd/containerd/pkg/cri/config" containerstore "github.com/containerd/containerd/pkg/cri/store/container" imagestore "github.com/containerd/containerd/pkg/cri/store/image" - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" runtimeoptions "github.com/containerd/containerd/pkg/runtimeoptions/v1" "github.com/containerd/containerd/plugin" runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - docker "github.com/distribution/reference" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - - runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" - imagedigest "github.com/opencontainers/go-digest" - "github.com/pelletier/go-toml/v2" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) +// TODO: Move common helpers for sbserver and podsandbox to a dedicated package once basic services are functinal. + const ( // errorStartReason is the exit reason when fails to start container. errorStartReason = "StartError" @@ -79,8 +79,7 @@ const ( containerKindSandbox = "sandbox" // containerKindContainer is a label value indicating container is application container containerKindContainer = "container" - // sandboxMetadataExtension is an extension name that identify metadata of sandbox in CreateContainerRequest - sandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata" + // containerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest containerMetadataExtension = criContainerdPrefix + ".container.metadata" @@ -89,16 +88,57 @@ const ( // runtimeRunhcsV1 is the runtime type for runhcs. runtimeRunhcsV1 = "io.containerd.runhcs.v1" + + // devShm is the default path of /dev/shm. + devShm = "/dev/shm" + // etcHosts is the default path of /etc/hosts file. + etcHosts = "/etc/hosts" + // etcHostname is the default path of /etc/hostname file. + etcHostname = "/etc/hostname" + // resolvConfPath is the abs path of resolv.conf on host or container. + resolvConfPath = "/etc/resolv.conf" ) +// getSandboxRootDir returns the root directory for managing sandbox files, +// e.g. hosts files. +func (c *criService) getSandboxRootDir(id string) string { + return filepath.Join(c.config.RootDir, sandboxesDir, id) +} + +// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files, +// e.g. named pipes. +func (c *criService) getVolatileSandboxRootDir(id string) string { + return filepath.Join(c.config.StateDir, sandboxesDir, id) +} + +// getSandboxHostname returns the hostname file path inside the sandbox root directory. +func (c *criService) getSandboxHostname(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "hostname") +} + +// getSandboxHosts returns the hosts file path inside the sandbox root directory. +func (c *criService) getSandboxHosts(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "hosts") +} + +// getResolvPath returns resolv.conf filepath for specified sandbox. +func (c *criService) getResolvPath(id string) string { + return filepath.Join(c.getSandboxRootDir(id), "resolv.conf") +} + +// getSandboxDevShm returns the shm file path inside the sandbox root directory. +func (c *criService) getSandboxDevShm(id string) string { + return filepath.Join(c.getVolatileSandboxRootDir(id), "shm") +} + // makeSandboxName generates sandbox name from sandbox metadata. The name // generated is unique as long as sandbox metadata is unique. func makeSandboxName(s *runtime.PodSandboxMetadata) string { return strings.Join([]string{ - s.Name, // 0 - s.Namespace, // 1 - s.Uid, // 2 - strconv.Itoa(int(s.Attempt)), // 3 + s.Name, // 0 + s.Namespace, // 1 + s.Uid, // 2 + strconv.FormatUint(uint64(s.Attempt), 10), // 3 }, nameDelimiter) } @@ -115,18 +155,6 @@ func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetada }, nameDelimiter) } -// getSandboxRootDir returns the root directory for managing sandbox files, -// e.g. hosts files. -func (c *criService) getSandboxRootDir(id string) string { - return filepath.Join(c.config.RootDir, sandboxesDir, id) -} - -// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files, -// e.g. named pipes. -func (c *criService) getVolatileSandboxRootDir(id string) string { - return filepath.Join(c.config.StateDir, sandboxesDir, id) -} - // getContainerRootDir returns the root directory for managing container files, // e.g. state checkpoint. func (c *criService) getContainerRootDir(id string) string { @@ -144,51 +172,6 @@ func criContainerStateToString(state runtime.ContainerState) string { return runtime.ContainerState_name[int32(state)] } -// getRepoDigestAngTag returns image repoDigest and repoTag of the named image reference. -func getRepoDigestAndTag(namedRef docker.Named, digest imagedigest.Digest, schema1 bool) (string, string) { - var repoTag, repoDigest string - if _, ok := namedRef.(docker.NamedTagged); ok { - repoTag = namedRef.String() - } - if _, ok := namedRef.(docker.Canonical); ok { - repoDigest = namedRef.String() - } else if !schema1 { - // digest is not actual repo digest for schema1 image. - repoDigest = namedRef.Name() + "@" + digest.String() - } - return repoDigest, repoTag -} - -// localResolve resolves image reference locally and returns corresponding image metadata. It -// returns errdefs.ErrNotFound if the reference doesn't exist. -func (c *criService) localResolve(refOrID string) (imagestore.Image, error) { - getImageID := func(refOrId string) string { - if _, err := imagedigest.Parse(refOrID); err == nil { - return refOrID - } - return func(ref string) string { - // ref is not image id, try to resolve it locally. - // TODO(random-liu): Handle this error better for debugging. - normalized, err := docker.ParseDockerRef(ref) - if err != nil { - return "" - } - id, err := c.imageStore.Resolve(normalized.String()) - if err != nil { - return "" - } - return id - }(refOrID) - } - - imageID := getImageID(refOrID) - if imageID == "" { - // Try to treat ref as imageID - imageID = refOrID - } - return c.imageStore.Get(imageID) -} - // toContainerdImage converts an image object in image store to containerd image handler. func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) { // image should always have at least one reference. @@ -217,30 +200,6 @@ func getUserFromImage(user string) (*int64, string) { return &uid, "" } -// ensureImageExists returns corresponding metadata of the image reference, if image is not -// pulled yet, the function will pull the image. -func (c *criService) ensureImageExists(ctx context.Context, ref string, config *runtime.PodSandboxConfig) (*imagestore.Image, error) { - image, err := c.localResolve(ref) - if err != nil && !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to get image %q: %w", ref, err) - } - if err == nil { - return &image, nil - } - // Pull image to ensure the image exists - resp, err := c.PullImage(ctx, &runtime.PullImageRequest{Image: &runtime.ImageSpec{Image: ref}, SandboxConfig: config}) - if err != nil { - return nil, fmt.Errorf("failed to pull image %q: %w", ref, err) - } - imageID := resp.GetImageRef() - newImage, err := c.imageStore.Get(imageID) - if err != nil { - // It's still possible that someone removed the image right after it is pulled. - return nil, fmt.Errorf("failed to get image %q after pulling: %w", imageID, err) - } - return &newImage, nil -} - // validateTargetContainer checks that a container is a valid // target for a container using PID NamespaceMode_TARGET. // The target container must be in the same sandbox and must be running. @@ -302,34 +261,6 @@ func buildLabels(configLabels, imageConfigLabels map[string]string, containerTyp return labels } -// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config. -func toRuntimeAuthConfig(a criconfig.AuthConfig) *runtime.AuthConfig { - return &runtime.AuthConfig{ - Username: a.Username, - Password: a.Password, - Auth: a.Auth, - IdentityToken: a.IdentityToken, - } -} - -// parseImageReferences parses a list of arbitrary image references and returns -// the repotags and repodigests -func parseImageReferences(refs []string) ([]string, []string) { - var tags, digests []string - for _, ref := range refs { - parsed, err := docker.ParseAnyReference(ref) - if err != nil { - continue - } - if _, ok := parsed.(docker.Canonical); ok { - digests = append(digests, parsed.String()) - } else if _, ok := parsed.(docker.Tagged); ok { - tags = append(tags, parsed.String()) - } - } - return tags, digests -} - // generateRuntimeOptions generates runtime options from cri plugin config. func generateRuntimeOptions(r criconfig.Runtime) (interface{}, error) { if r.Options == nil { @@ -399,13 +330,6 @@ func unknownContainerStatus() containerstore.Status { } } -// unknownSandboxStatus returns the default sandbox status when its status is unknown. -func unknownSandboxStatus() sandboxstore.Status { - return sandboxstore.Status{ - State: sandboxstore.StateUnknown, - } -} - // getPassthroughAnnotations filters requested pod annotations by comparing // against permitted annotations for the given runtime. func getPassthroughAnnotations(podAnnotations map[string]string, @@ -512,7 +436,7 @@ func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) { podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID) if err != nil { - log.L.Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err) + log.G(ctx).Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err) podSandboxStatus = nil } containerStatuses, err := c.getContainerStatuses(ctx, sandboxID) @@ -590,3 +514,180 @@ func hostNetwork(config *runtime.PodSandboxConfig) bool { } return hostNet } + +// getCgroupsPath generates container cgroups path. +func getCgroupsPath(cgroupsParent, id string) string { + base := path.Base(cgroupsParent) + if strings.HasSuffix(base, ".slice") { + // For a.slice/b.slice/c.slice, base is c.slice. + // runc systemd cgroup path format is "slice:prefix:name". + return strings.Join([]string{base, "cri-containerd", id}, ":") + } + return filepath.Join(cgroupsParent, id) +} + +func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) { + var labels []string + + if selinuxOptions == nil { + return nil, nil + } + if err := checkSelinuxLevel(selinuxOptions.Level); err != nil { + return nil, err + } + if selinuxOptions.User != "" { + labels = append(labels, "user:"+selinuxOptions.User) + } + if selinuxOptions.Role != "" { + labels = append(labels, "role:"+selinuxOptions.Role) + } + if selinuxOptions.Type != "" { + labels = append(labels, "type:"+selinuxOptions.Type) + } + if selinuxOptions.Level != "" { + labels = append(labels, "level:"+selinuxOptions.Level) + } + + return labels, nil +} + +func checkSelinuxLevel(level string) error { + if len(level) == 0 { + return nil + } + + matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level) + if err != nil { + return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err) + } + if !matched { + return fmt.Errorf("the format of 'level' %q is not correct", level) + } + return nil +} + +func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]runtimespec.LinuxIDMapping, error) { + var m []runtimespec.LinuxIDMapping + + if len(runtimeIDMap) == 0 { + return m, nil + } + + if len(runtimeIDMap) > 1 { + // We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that. + return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap)) + } + + // We know len is 1 now. + if runtimeIDMap[0] == nil { + return m, nil + } + uidMap := *runtimeIDMap[0] + + if uidMap.Length < 1 { + return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length) + } + + m = []runtimespec.LinuxIDMapping{ + { + ContainerID: uidMap.ContainerId, + HostID: uidMap.HostId, + Size: uidMap.Length, + }, + } + + return m, nil +} + +func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []runtimespec.LinuxIDMapping, retErr error) { + if userns == nil { + // If userns is not set, the kubelet doesn't support this option + // and we should just fallback to no userns. This is completely + // valid. + return nil, nil, nil + } + + uids, err := parseUsernsIDMap(userns.GetUids()) + if err != nil { + return nil, nil, fmt.Errorf("UID mapping: %w", err) + } + + gids, err = parseUsernsIDMap(userns.GetGids()) + if err != nil { + return nil, nil, fmt.Errorf("GID mapping: %w", err) + } + + switch mode := userns.GetMode(); mode { + case runtime.NamespaceMode_NODE: + if len(uids) != 0 || len(gids) != 0 { + return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids)) + } + case runtime.NamespaceMode_POD: + // This is valid, we will handle it in WithPodNamespaces(). + if len(uids) == 0 || len(gids) == 0 { + return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode) + } + default: + return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode) + } + + return uids, gids, nil +} + +// sameUsernsConfig checks if the userns configs are the same. If the mappings +// on each config are the same but in different order, it returns false. +// XXX: If the runtime.UserNamespace struct changes, we should update this +// function accordingly. +func sameUsernsConfig(a, b *runtime.UserNamespace) bool { + // If both are nil, they are the same. + if a == nil && b == nil { + return true + } + // If only one is nil, they are different. + if a == nil || b == nil { + return false + } + // At this point, a is not nil nor b. + + if a.GetMode() != b.GetMode() { + return false + } + + aUids, aGids, err := parseUsernsIDs(a) + if err != nil { + return false + } + bUids, bGids, err := parseUsernsIDs(b) + if err != nil { + return false + } + + if !sameMapping(aUids, bUids) { + return false + } + if !sameMapping(aGids, bGids) { + return false + } + return true +} + +// sameMapping checks if the mappings are the same. If the mappings are the same +// but in different order, it returns false. +func sameMapping(a, b []runtimespec.LinuxIDMapping) bool { + if len(a) != len(b) { + return false + } + + for x := range a { + if a[x].ContainerID != b[x].ContainerID { + return false + } + if a[x].HostID != b[x].HostID { + return false + } + if a[x].Size != b[x].Size { + return false + } + } + return true +} diff --git a/pkg/cri/server/helpers_linux.go b/pkg/cri/server/helpers_linux.go index 201f92947..4580ba138 100644 --- a/pkg/cri/server/helpers_linux.go +++ b/pkg/cri/server/helpers_linux.go @@ -20,14 +20,18 @@ import ( "context" "fmt" "os" - "path" "path/filepath" - "regexp" "sort" "strings" "syscall" "time" + "github.com/containerd/cgroups/v3" + "github.com/moby/sys/mountinfo" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + "github.com/containerd/containerd" "github.com/containerd/containerd/mount" "github.com/containerd/containerd/pkg/apparmor" @@ -35,111 +39,8 @@ import ( "github.com/containerd/containerd/pkg/seutil" "github.com/containerd/containerd/snapshots" "github.com/containerd/log" - "github.com/moby/sys/mountinfo" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux/label" - "golang.org/x/sys/unix" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) -const ( - // defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938). - defaultSandboxOOMAdj = -998 - // defaultShmSize is the default size of the sandbox shm. - defaultShmSize = int64(1024 * 1024 * 64) - // relativeRootfsPath is the rootfs path relative to bundle path. - relativeRootfsPath = "rootfs" - // devShm is the default path of /dev/shm. - devShm = "/dev/shm" - // etcHosts is the default path of /etc/hosts file. - etcHosts = "/etc/hosts" - // etcHostname is the default path of /etc/hostname file. - etcHostname = "/etc/hostname" - // resolvConfPath is the abs path of resolv.conf on host or container. - resolvConfPath = "/etc/resolv.conf" - // hostnameEnv is the key for HOSTNAME env. - hostnameEnv = "HOSTNAME" -) - -// getCgroupsPath generates container cgroups path. -func getCgroupsPath(cgroupsParent, id string) string { - base := path.Base(cgroupsParent) - if strings.HasSuffix(base, ".slice") { - // For a.slice/b.slice/c.slice, base is c.slice. - // runc systemd cgroup path format is "slice:prefix:name". - return strings.Join([]string{base, "cri-containerd", id}, ":") - } - return filepath.Join(cgroupsParent, id) -} - -// getSandboxHostname returns the hostname file path inside the sandbox root directory. -func (c *criService) getSandboxHostname(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "hostname") -} - -// getSandboxHosts returns the hosts file path inside the sandbox root directory. -func (c *criService) getSandboxHosts(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "hosts") -} - -// getResolvPath returns resolv.conf filepath for specified sandbox. -func (c *criService) getResolvPath(id string) string { - return filepath.Join(c.getSandboxRootDir(id), "resolv.conf") -} - -// getSandboxDevShm returns the shm file path inside the sandbox root directory. -func (c *criService) getSandboxDevShm(id string) string { - return filepath.Join(c.getVolatileSandboxRootDir(id), "shm") -} - -func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) { - var labels []string - - if selinuxOptions == nil { - return nil, nil - } - if err := checkSelinuxLevel(selinuxOptions.Level); err != nil { - return nil, err - } - if selinuxOptions.User != "" { - labels = append(labels, "user:"+selinuxOptions.User) - } - if selinuxOptions.Role != "" { - labels = append(labels, "role:"+selinuxOptions.Role) - } - if selinuxOptions.Type != "" { - labels = append(labels, "type:"+selinuxOptions.Type) - } - if selinuxOptions.Level != "" { - labels = append(labels, "level:"+selinuxOptions.Level) - } - - return labels, nil -} - -func initLabelsFromOpt(selinuxOpts *runtime.SELinuxOption) (string, string, error) { - labels, err := toLabel(selinuxOpts) - if err != nil { - return "", "", err - } - return label.InitLabels(labels) -} - -func checkSelinuxLevel(level string) error { - if len(level) == 0 { - return nil - } - - matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level) - if err != nil { - return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err) - } - if !matched { - return fmt.Errorf("the format of 'level' %q is not correct", level) - } - return nil -} - // apparmorEnabled returns true if apparmor is enabled, supported by the host, // if apparmor_parser is installed, and if we are not running docker-in-docker. func (c *criService) apparmorEnabled() bool { @@ -283,130 +184,10 @@ func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { return nil } -func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]specs.LinuxIDMapping, error) { - var m []specs.LinuxIDMapping - - if len(runtimeIDMap) == 0 { - return m, nil - } - - if len(runtimeIDMap) > 1 { - // We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that. - return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap)) - } - - // We know len is 1 now. - if runtimeIDMap[0] == nil { - return m, nil - } - uidMap := *runtimeIDMap[0] - - if uidMap.Length < 1 { - return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length) - } - - m = []specs.LinuxIDMapping{ - { - ContainerID: uidMap.ContainerId, - HostID: uidMap.HostId, - Size: uidMap.Length, - }, - } - - return m, nil -} - -func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []specs.LinuxIDMapping, retErr error) { - if userns == nil { - // If userns is not set, the kubelet doesn't support this option - // and we should just fallback to no userns. This is completely - // valid. - return nil, nil, nil - } - - uids, err := parseUsernsIDMap(userns.GetUids()) - if err != nil { - return nil, nil, fmt.Errorf("UID mapping: %w", err) - } - - gids, err = parseUsernsIDMap(userns.GetGids()) - if err != nil { - return nil, nil, fmt.Errorf("GID mapping: %w", err) - } - - switch mode := userns.GetMode(); mode { - case runtime.NamespaceMode_NODE: - if len(uids) != 0 || len(gids) != 0 { - return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids)) - } - case runtime.NamespaceMode_POD: - // This is valid, we will handle it in WithPodNamespaces(). - if len(uids) == 0 || len(gids) == 0 { - return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode) - } - default: - return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode) - } - - return uids, gids, nil -} - -// sameUsernsConfig checks if the userns configs are the same. If the mappings -// on each config are the same but in different order, it returns false. -// XXX: If the runtime.UserNamespace struct changes, we should update this -// function accordingly. -func sameUsernsConfig(a, b *runtime.UserNamespace) bool { - // If both are nil, they are the same. - if a == nil && b == nil { - return true - } - // If only one is nil, they are different. - if a == nil || b == nil { - return false - } - // At this point, a is not nil nor b. - - if a.GetMode() != b.GetMode() { - return false - } - - aUids, aGids, err := parseUsernsIDs(a) - if err != nil { - return false - } - bUids, bGids, err := parseUsernsIDs(b) - if err != nil { - return false - } - - if !sameMapping(aUids, bUids) { - return false - } - if !sameMapping(aGids, bGids) { - return false - } - return true -} - -// sameMapping checks if the mappings are the same. If the mappings are the same -// but in different order, it returns false. -func sameMapping(a, b []specs.LinuxIDMapping) bool { - if len(a) != len(b) { - return false - } - - for x := range a { - if a[x].ContainerID != b[x].ContainerID { - return false - } - if a[x].HostID != b[x].HostID { - return false - } - if a[x].Size != b[x].Size { - return false - } - } - return true +// getCgroupsMode returns cgropu mode. +// TODO: add build constraints to cgroups package and remove this helper +func isUnifiedCgroupsMode() bool { + return cgroups.Mode() == cgroups.Unified } func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) { diff --git a/pkg/cri/server/helpers_linux_test.go b/pkg/cri/server/helpers_linux_test.go deleted file mode 100644 index 1249d3ad7..000000000 --- a/pkg/cri/server/helpers_linux_test.go +++ /dev/null @@ -1,106 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "os" - "path/filepath" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "golang.org/x/sys/unix" -) - -func TestGetCgroupsPath(t *testing.T) { - testID := "test-id" - for _, test := range []struct { - desc string - cgroupsParent string - expected string - }{ - { - desc: "should support regular cgroup path", - cgroupsParent: "/a/b", - expected: "/a/b/test-id", - }, - { - desc: "should support systemd cgroup path", - cgroupsParent: "/a.slice/b.slice", - expected: "b.slice:cri-containerd:test-id"}, - { - desc: "should support tailing slash for regular cgroup path", - cgroupsParent: "/a/b/", - expected: "/a/b/test-id", - }, - { - desc: "should support tailing slash for systemd cgroup path", - cgroupsParent: "/a.slice/b.slice/", - expected: "b.slice:cri-containerd:test-id", - }, - { - desc: "should treat root cgroup as regular cgroup path", - cgroupsParent: "/", - expected: "/test-id", - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := getCgroupsPath(test.cgroupsParent, testID) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestEnsureRemoveAllWithMount(t *testing.T) { - if os.Getuid() != 0 { - t.Skip("skipping test that requires root") - } - - var err error - dir1 := t.TempDir() - dir2 := t.TempDir() - - bindDir := filepath.Join(dir1, "bind") - if err := os.MkdirAll(bindDir, 0755); err != nil { - t.Fatal(err) - } - - if err := unix.Mount(dir2, bindDir, "none", unix.MS_BIND, ""); err != nil { - t.Fatal(err) - } - - done := make(chan struct{}) - go func() { - err = ensureRemoveAll(context.Background(), dir1) - close(done) - }() - - select { - case <-done: - if err != nil { - t.Fatal(err) - } - case <-time.After(5 * time.Second): - t.Fatal("timeout waiting for EnsureRemoveAll to finish") - } - - if _, err := os.Stat(dir1); !os.IsNotExist(err) { - t.Fatalf("expected %q to not exist", dir1) - } -} diff --git a/pkg/cri/server/helpers_other.go b/pkg/cri/server/helpers_other.go index f877539a7..62b4b6c56 100644 --- a/pkg/cri/server/helpers_other.go +++ b/pkg/cri/server/helpers_other.go @@ -41,3 +41,7 @@ func ensureRemoveAll(ctx context.Context, dir string) error { func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { return nil } + +func isUnifiedCgroupsMode() bool { + return false +} diff --git a/pkg/cri/server/helpers_selinux_linux_test.go b/pkg/cri/server/helpers_selinux_linux_test.go deleted file mode 100644 index b81219184..000000000 --- a/pkg/cri/server/helpers_selinux_linux_test.go +++ /dev/null @@ -1,180 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "testing" - - "github.com/opencontainers/selinux/go-selinux" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestInitSelinuxOpts(t *testing.T) { - if !selinux.GetEnabled() { - t.Skip("selinux is not enabled") - } - - for _, test := range []struct { - desc string - selinuxOpt *runtime.SELinuxOption - processLabel string - mountLabel string - expectErr bool - }{ - { - desc: "Should return empty strings for processLabel and mountLabel when selinuxOpt is nil", - selinuxOpt: nil, - processLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}", - mountLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}", - }, - { - desc: "Should overlay fields on processLabel when selinuxOpt has been initialized partially", - selinuxOpt: &runtime.SELinuxOption{ - User: "", - Role: "user_r", - Type: "", - Level: "s0:c1,c2", - }, - processLabel: "system_u:user_r:(container_file_t|svirt_lxc_net_t):s0:c1,c2", - mountLabel: "system_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2", - }, - { - desc: "Should be resolved correctly when selinuxOpt has been initialized completely", - selinuxOpt: &runtime.SELinuxOption{ - User: "user_u", - Role: "user_r", - Type: "user_t", - Level: "s0:c1,c2", - }, - processLabel: "user_u:user_r:user_t:s0:c1,c2", - mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2", - }, - { - desc: "Should be resolved correctly when selinuxOpt has been initialized with level=''", - selinuxOpt: &runtime.SELinuxOption{ - User: "user_u", - Role: "user_r", - Type: "user_t", - Level: "", - }, - processLabel: "user_u:user_r:user_t:s0:c[0-9]{1,3},c[0-9]{1,3}", - mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0", - }, - { - desc: "Should return error when the format of 'level' is not correct", - selinuxOpt: &runtime.SELinuxOption{ - User: "user_u", - Role: "user_r", - Type: "user_t", - Level: "s0,c1,c2", - }, - expectErr: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - processLabel, mountLabel, err := initLabelsFromOpt(test.selinuxOpt) - if test.expectErr { - assert.Error(t, err) - } else { - assert.Regexp(t, test.processLabel, processLabel) - assert.Regexp(t, test.mountLabel, mountLabel) - } - }) - } -} - -func TestCheckSelinuxLevel(t *testing.T) { - for _, test := range []struct { - desc string - level string - expectNoMatch bool - }{ - { - desc: "s0", - level: "s0", - }, - { - desc: "s0-s0", - level: "s0-s0", - }, - { - desc: "s0:c0", - level: "s0:c0", - }, - { - desc: "s0:c0.c3", - level: "s0:c0.c3", - }, - { - desc: "s0:c0,c3", - level: "s0:c0,c3", - }, - { - desc: "s0-s0:c0,c3", - level: "s0-s0:c0,c3", - }, - { - desc: "s0-s0:c0,c3.c6", - level: "s0-s0:c0,c3.c6", - }, - { - desc: "s0-s0:c0,c3.c6,c8.c10", - level: "s0-s0:c0,c3.c6,c8.c10", - }, - { - desc: "s0-s0:c0,c3.c6,c8,c10", - level: "s0-s0:c0,c3.c6", - }, - { - desc: "s0,c0,c3", - level: "s0,c0,c3", - expectNoMatch: true, - }, - { - desc: "s0:c0.c3.c6", - level: "s0:c0.c3.c6", - expectNoMatch: true, - }, - { - desc: "s0-s0,c0,c3", - level: "s0-s0,c0,c3", - expectNoMatch: true, - }, - { - desc: "s0-s0:c0.c3.c6", - level: "s0-s0:c0.c3.c6", - expectNoMatch: true, - }, - { - desc: "s0-s0:c0,c3.c6.c8", - level: "s0-s0:c0,c3.c6.c8", - expectNoMatch: true, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - err := checkSelinuxLevel(test.level) - if test.expectNoMatch { - assert.Error(t, err) - } else { - assert.NoError(t, err) - } - }) - } -} diff --git a/pkg/cri/server/helpers_test.go b/pkg/cri/server/helpers_test.go index 0d78d75f2..5e58ef12a 100644 --- a/pkg/cri/server/helpers_test.go +++ b/pkg/cri/server/helpers_test.go @@ -24,25 +24,21 @@ import ( "testing" "time" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/oci" criconfig "github.com/containerd/containerd/pkg/cri/config" containerstore "github.com/containerd/containerd/pkg/cri/store/container" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" "github.com/containerd/containerd/plugin" "github.com/containerd/containerd/protobuf/types" runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" "github.com/containerd/typeurl/v2" - docker "github.com/distribution/reference" - imagedigest "github.com/opencontainers/go-digest" runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pelletier/go-toml/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) // TestGetUserFromImage tests the logic of getting image uid or user name of image user. @@ -93,52 +89,6 @@ func TestGetUserFromImage(t *testing.T) { } } -func TestGetRepoDigestAndTag(t *testing.T) { - digest := imagedigest.Digest("sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582") - for _, test := range []struct { - desc string - ref string - schema1 bool - expectedRepoDigest string - expectedRepoTag string - }{ - { - desc: "repo tag should be empty if original ref has no tag", - ref: "gcr.io/library/busybox@" + digest.String(), - expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(), - }, - { - desc: "repo tag should not be empty if original ref has tag", - ref: "gcr.io/library/busybox:latest", - expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(), - expectedRepoTag: "gcr.io/library/busybox:latest", - }, - { - desc: "repo digest should be empty if original ref is schema1 and has no digest", - ref: "gcr.io/library/busybox:latest", - schema1: true, - expectedRepoDigest: "", - expectedRepoTag: "gcr.io/library/busybox:latest", - }, - { - desc: "repo digest should not be empty if original ref is schema1 but has digest", - ref: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594", - schema1: true, - expectedRepoDigest: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594", - expectedRepoTag: "", - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - named, err := docker.ParseDockerRef(test.ref) - assert.NoError(t, err) - repoDigest, repoTag := getRepoDigestAndTag(named, digest, test.schema1) - assert.Equal(t, test.expectedRepoDigest, repoDigest) - assert.Equal(t, test.expectedRepoTag, repoTag) - }) - } -} - func TestBuildLabels(t *testing.T) { imageConfigLabels := map[string]string{ "a": "z", @@ -162,63 +112,9 @@ func TestBuildLabels(t *testing.T) { assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label") } -func TestParseImageReferences(t *testing.T) { - refs := []string{ - "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - "gcr.io/library/busybox:1.2", - "sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - "arbitrary-ref", - } - expectedTags := []string{ - "gcr.io/library/busybox:1.2", - } - expectedDigests := []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"} - tags, digests := parseImageReferences(refs) - assert.Equal(t, expectedTags, tags) - assert.Equal(t, expectedDigests, digests) -} - -func TestLocalResolve(t *testing.T) { - image := imagestore.Image{ - ID: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", - ChainID: "test-chain-id-1", - References: []string{ - "docker.io/library/busybox:latest", - "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - }, - Size: 10, - } - c := newTestCRIService() - var err error - c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image}) - assert.NoError(t, err) - - for _, ref := range []string{ - "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799", - "busybox", - "busybox:latest", - "busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - "library/busybox", - "library/busybox:latest", - "library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - "docker.io/busybox", - "docker.io/busybox:latest", - "docker.io/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - "docker.io/library/busybox", - "docker.io/library/busybox:latest", - "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - } { - img, err := c.localResolve(ref) - assert.NoError(t, err) - assert.Equal(t, image, img) - } - img, err := c.localResolve("randomid") - assert.Equal(t, errdefs.IsNotFound(err), true) - assert.Equal(t, imagestore.Image{}, img) -} - func TestGenerateRuntimeOptions(t *testing.T) { nilOpts := ` +systemd_cgroup = true [containerd] no_pivot = true default_runtime_name = "default" @@ -226,6 +122,7 @@ func TestGenerateRuntimeOptions(t *testing.T) { runtime_type = "` + plugin.RuntimeRuncV2 + `" ` nonNilOpts := ` +systemd_cgroup = true [containerd] no_pivot = true default_runtime_name = "default" @@ -649,6 +546,7 @@ func TestHostNetwork(t *testing.T) { if goruntime.GOOS != "linux" { t.Skip() } + tt := tt t.Run(tt.name, func(t *testing.T) { if hostNetwork(tt.c) != tt.expected { diff --git a/pkg/cri/server/helpers_windows.go b/pkg/cri/server/helpers_windows.go index d0a5b6f7f..a289abc44 100644 --- a/pkg/cri/server/helpers_windows.go +++ b/pkg/cri/server/helpers_windows.go @@ -169,3 +169,7 @@ func ensureRemoveAll(_ context.Context, dir string) error { func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { return nil } + +func isUnifiedCgroupsMode() bool { + return false +} diff --git a/pkg/cri/server/helpers_windows_test.go b/pkg/cri/server/helpers_windows_test.go index 16d2eadce..d1fcd9ad5 100644 --- a/pkg/cri/server/helpers_windows_test.go +++ b/pkg/cri/server/helpers_windows_test.go @@ -50,6 +50,15 @@ func TestWindowsHostNetwork(t *testing.T) { }, expected: true, }, + { + name: "when no host process return false", + c: &runtime.PodSandboxConfig{ + Windows: &runtime.WindowsPodSandboxConfig{ + SecurityContext: &runtime.WindowsSandboxSecurityContext{}, + }, + }, + expected: false, + }, } for _, tt := range tests { diff --git a/pkg/cri/server/image_list.go b/pkg/cri/server/image_list.go deleted file mode 100644 index 7f7fc1247..000000000 --- a/pkg/cri/server/image_list.go +++ /dev/null @@ -1,39 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ListImages lists existing images. -// TODO(random-liu): Add image list filters after CRI defines this more clear, and kubelet -// actually needs it. -func (c *criService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (*runtime.ListImagesResponse, error) { - imagesInStore := c.imageStore.List() - - var images []*runtime.Image - for _, image := range imagesInStore { - // TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot - // doesn't exist? - images = append(images, toCRIImage(image)) - } - - return &runtime.ListImagesResponse{Images: images}, nil -} diff --git a/pkg/cri/server/image_list_test.go b/pkg/cri/server/image_list_test.go deleted file mode 100644 index b504defc3..000000000 --- a/pkg/cri/server/image_list_test.go +++ /dev/null @@ -1,113 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - imagestore "github.com/containerd/containerd/pkg/cri/store/image" -) - -func TestListImages(t *testing.T) { - c := newTestCRIService() - imagesInStore := []imagestore.Image{ - { - ID: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - ChainID: "test-chainid-1", - References: []string{ - "gcr.io/library/busybox:latest", - "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - }, - Size: 1000, - ImageSpec: imagespec.Image{ - Config: imagespec.ImageConfig{ - User: "root", - }, - }, - }, - { - ID: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - ChainID: "test-chainid-2", - References: []string{ - "gcr.io/library/alpine:latest", - "gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - }, - Size: 2000, - ImageSpec: imagespec.Image{ - Config: imagespec.ImageConfig{ - User: "1234:1234", - }, - }, - }, - { - ID: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - ChainID: "test-chainid-3", - References: []string{ - "gcr.io/library/ubuntu:latest", - "gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - }, - Size: 3000, - ImageSpec: imagespec.Image{ - Config: imagespec.ImageConfig{ - User: "nobody", - }, - }, - }, - } - expect := []*runtime.Image{ - { - Id: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - RepoTags: []string{"gcr.io/library/busybox:latest"}, - RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, - Size_: uint64(1000), - Username: "root", - }, - { - Id: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - RepoTags: []string{"gcr.io/library/alpine:latest"}, - RepoDigests: []string{"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, - Size_: uint64(2000), - Uid: &runtime.Int64Value{Value: 1234}, - }, - { - Id: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - RepoTags: []string{"gcr.io/library/ubuntu:latest"}, - RepoDigests: []string{"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, - Size_: uint64(3000), - Username: "nobody", - }, - } - - var err error - c.imageStore, err = imagestore.NewFakeStore(imagesInStore) - assert.NoError(t, err) - - resp, err := c.ListImages(context.Background(), &runtime.ListImagesRequest{}) - assert.NoError(t, err) - require.NotNil(t, resp) - images := resp.GetImages() - assert.Len(t, images, len(expect)) - for _, i := range expect { - assert.Contains(t, images, i) - } -} diff --git a/pkg/cri/server/image_pull.go b/pkg/cri/server/image_pull.go deleted file mode 100644 index 5b3629cfb..000000000 --- a/pkg/cri/server/image_pull.go +++ /dev/null @@ -1,738 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "crypto/tls" - "encoding/base64" - "fmt" - "io" - "net" - "net/http" - "net/url" - "path/filepath" - "strconv" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/containerd/imgcrypt" - "github.com/containerd/imgcrypt/images/encryption" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/errdefs" - containerdimages "github.com/containerd/containerd/images" - "github.com/containerd/containerd/pkg/cri/annotations" - criconfig "github.com/containerd/containerd/pkg/cri/config" - crilabels "github.com/containerd/containerd/pkg/cri/labels" - snpkg "github.com/containerd/containerd/pkg/snapshotters" - "github.com/containerd/containerd/remotes/docker" - "github.com/containerd/containerd/remotes/docker/config" - "github.com/containerd/containerd/tracing" - "github.com/containerd/log" - distribution "github.com/distribution/reference" -) - -// For image management: -// 1) We have an in-memory metadata index to: -// a. Maintain ImageID -> RepoTags, ImageID -> RepoDigset relationships; ImageID -// is the digest of image config, which conforms to oci image spec. -// b. Cache constant and useful information such as image chainID, config etc. -// c. An image will be added into the in-memory metadata only when it's successfully -// pulled and unpacked. -// -// 2) We use containerd image metadata store and content store: -// a. To resolve image reference (digest/tag) locally. During pulling image, we -// normalize the image reference provided by user, and put it into image metadata -// store with resolved descriptor. For the other operations, if image id is provided, -// we'll access the in-memory metadata index directly; if image reference is -// provided, we'll normalize it, resolve it in containerd image metadata store -// to get the image id. -// b. As the backup of in-memory metadata in 1). During startup, the in-memory -// metadata could be re-constructed from image metadata store + content store. -// -// Several problems with current approach: -// 1) An entry in containerd image metadata store doesn't mean a "READY" (successfully -// pulled and unpacked) image. E.g. during pulling, the client gets killed. In that case, -// if we saw an image without snapshots or with in-complete contents during startup, -// should we re-pull the image? Or should we remove the entry? -// -// yanxuean: We can't delete image directly, because we don't know if the image -// is pulled by us. There are resource leakage. -// -// 2) Containerd suggests user to add entry before pulling the image. However if -// an error occurs during the pulling, should we remove the entry from metadata -// store? Or should we leave it there until next startup (resource leakage)? -// -// 3) The cri plugin only exposes "READY" (successfully pulled and unpacked) images -// to the user, which are maintained in the in-memory metadata index. However, it's -// still possible that someone else removes the content or snapshot by-pass the cri plugin, -// how do we detect that and update the in-memory metadata correspondingly? Always -// check whether corresponding snapshot is ready when reporting image status? -// -// 4) Is the content important if we cached necessary information in-memory -// after we pull the image? How to manage the disk usage of contents? If some -// contents are missing but snapshots are ready, is the image still "READY"? - -// PullImage pulls an image with authentication config. -func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) { - span := tracing.SpanFromContext(ctx) - defer func() { - // TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism - // for the user to configure the set of registries that they are interested in. - if err != nil { - imagePulls.WithValues("failure").Inc() - } else { - imagePulls.WithValues("success").Inc() - } - }() - - inProgressImagePulls.Inc() - defer inProgressImagePulls.Dec() - startTime := time.Now() - - imageRef := r.GetImage().GetImage() - namedRef, err := distribution.ParseDockerRef(imageRef) - if err != nil { - return nil, fmt.Errorf("failed to parse image reference %q: %w", imageRef, err) - } - ref := namedRef.String() - if ref != imageRef { - log.G(ctx).Debugf("PullImage using normalized image ref: %q", ref) - } - - imagePullProgressTimeout, err := time.ParseDuration(c.config.ImagePullProgressTimeout) - if err != nil { - return nil, fmt.Errorf("failed to parse image_pull_progress_timeout %q: %w", c.config.ImagePullProgressTimeout, err) - } - - var ( - pctx, pcancel = context.WithCancel(ctx) - - pullReporter = newPullProgressReporter(ref, pcancel, imagePullProgressTimeout) - - resolver = docker.NewResolver(docker.ResolverOptions{ - Headers: c.config.Registry.Headers, - Hosts: c.registryHosts(ctx, r.GetAuth(), pullReporter.optionUpdateClient), - }) - isSchema1 bool - imageHandler containerdimages.HandlerFunc = func(_ context.Context, - desc imagespec.Descriptor) ([]imagespec.Descriptor, error) { - if desc.MediaType == containerdimages.MediaTypeDockerSchema1Manifest { - isSchema1 = true - } - return nil, nil - } - ) - - defer pcancel() - snapshotter, err := c.snapshotterFromPodSandboxConfig(ctx, ref, r.SandboxConfig) - if err != nil { - return nil, err - } - log.G(ctx).Debugf("PullImage %q with snapshotter %s", ref, snapshotter) - span.SetAttributes( - tracing.Attribute("image.ref", ref), - tracing.Attribute("snapshotter.name", snapshotter), - ) - - labels := c.getLabels(ctx, ref) - - pullOpts := []containerd.RemoteOpt{ - containerd.WithSchema1Conversion, //nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility. - containerd.WithResolver(resolver), - containerd.WithPullSnapshotter(snapshotter), - containerd.WithPullUnpack, - containerd.WithPullLabels(labels), - containerd.WithMaxConcurrentDownloads(c.config.MaxConcurrentDownloads), - containerd.WithImageHandler(imageHandler), - containerd.WithUnpackOpts([]containerd.UnpackOpt{ - containerd.WithUnpackDuplicationSuppressor(c.unpackDuplicationSuppressor), - }), - } - - pullOpts = append(pullOpts, c.encryptedImagesPullOpts()...) - if !c.config.ContainerdConfig.DisableSnapshotAnnotations { - pullOpts = append(pullOpts, - containerd.WithImageHandlerWrapper(snpkg.AppendInfoHandlerWrapper(ref))) - } - - if c.config.ContainerdConfig.DiscardUnpackedLayers { - // Allows GC to clean layers up from the content store after unpacking - pullOpts = append(pullOpts, - containerd.WithChildLabelMap(containerdimages.ChildGCLabelsFilterLayers)) - } - - pullReporter.start(pctx) - image, err := c.client.Pull(pctx, ref, pullOpts...) - pcancel() - if err != nil { - return nil, fmt.Errorf("failed to pull and unpack image %q: %w", ref, err) - } - span.AddEvent("Pull and unpack image complete") - - configDesc, err := image.Config(ctx) - if err != nil { - return nil, fmt.Errorf("get image config descriptor: %w", err) - } - imageID := configDesc.Digest.String() - - repoDigest, repoTag := getRepoDigestAndTag(namedRef, image.Target().Digest, isSchema1) - for _, r := range []string{imageID, repoTag, repoDigest} { - if r == "" { - continue - } - if err := c.createImageReference(ctx, r, image.Target(), labels); err != nil { - return nil, fmt.Errorf("failed to create image reference %q: %w", r, err) - } - // Update image store to reflect the newest state in containerd. - // No need to use `updateImage`, because the image reference must - // have been managed by the cri plugin. - if err := c.imageStore.Update(ctx, r); err != nil { - return nil, fmt.Errorf("failed to update image store %q: %w", r, err) - } - } - - const mbToByte = 1024 * 1024 - size, _ := image.Size(ctx) - imagePullingSpeed := float64(size) / mbToByte / time.Since(startTime).Seconds() - imagePullThroughput.Observe(imagePullingSpeed) - - log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", imageRef, imageID, - repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime)) - // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain - // in-memory image store, it's only for in-memory indexing. The image could be removed - // by someone else anytime, before/during/after we create the metadata. We should always - // check the actual state in containerd before using the image or returning status of the - // image. - return &runtime.PullImageResponse{ImageRef: imageID}, nil -} - -// ParseAuth parses AuthConfig and returns username and password/secret required by containerd. -func ParseAuth(auth *runtime.AuthConfig, host string) (string, string, error) { - if auth == nil { - return "", "", nil - } - if auth.ServerAddress != "" { - // Do not return the auth info when server address doesn't match. - u, err := url.Parse(auth.ServerAddress) - if err != nil { - return "", "", fmt.Errorf("parse server address: %w", err) - } - if host != u.Host { - return "", "", nil - } - } - if auth.Username != "" { - return auth.Username, auth.Password, nil - } - if auth.IdentityToken != "" { - return "", auth.IdentityToken, nil - } - if auth.Auth != "" { - decLen := base64.StdEncoding.DecodedLen(len(auth.Auth)) - decoded := make([]byte, decLen) - _, err := base64.StdEncoding.Decode(decoded, []byte(auth.Auth)) - if err != nil { - return "", "", err - } - user, passwd, ok := strings.Cut(string(decoded), ":") - if !ok { - return "", "", fmt.Errorf("invalid decoded auth: %q", decoded) - } - return user, strings.Trim(passwd, "\x00"), nil - } - // TODO(random-liu): Support RegistryToken. - // An empty auth config is valid for anonymous registry - return "", "", nil -} - -// createImageReference creates image reference inside containerd image store. -// Note that because create and update are not finished in one transaction, there could be race. E.g. -// the image reference is deleted by someone else after create returns already exists, but before update -// happens. -func (c *criService) createImageReference(ctx context.Context, name string, desc imagespec.Descriptor, labels map[string]string) error { - img := containerdimages.Image{ - Name: name, - Target: desc, - // Add a label to indicate that the image is managed by the cri plugin. - Labels: labels, - } - // TODO(random-liu): Figure out which is the more performant sequence create then update or - // update then create. - oldImg, err := c.client.ImageService().Create(ctx, img) - if err == nil || !errdefs.IsAlreadyExists(err) { - return err - } - if oldImg.Target.Digest == img.Target.Digest && oldImg.Labels[crilabels.ImageLabelKey] == labels[crilabels.ImageLabelKey] { - return nil - } - _, err = c.client.ImageService().Update(ctx, img, "target", "labels."+crilabels.ImageLabelKey) - return err -} - -// getLabels get image labels to be added on CRI image -func (c *criService) getLabels(ctx context.Context, name string) map[string]string { - labels := map[string]string{crilabels.ImageLabelKey: crilabels.ImageLabelValue} - configSandboxImage := c.config.SandboxImage - // parse sandbox image - sandboxNamedRef, err := distribution.ParseDockerRef(configSandboxImage) - if err != nil { - log.G(ctx).Errorf("failed to parse sandbox image from config %s", sandboxNamedRef) - return nil - } - sandboxRef := sandboxNamedRef.String() - // Adding pinned image label to sandbox image - if sandboxRef == name { - labels[crilabels.PinnedImageLabelKey] = crilabels.PinnedImageLabelValue - } - return labels -} - -// updateImage updates image store to reflect the newest state of an image reference -// in containerd. If the reference is not managed by the cri plugin, the function also -// generates necessary metadata for the image and make it managed. -func (c *criService) updateImage(ctx context.Context, r string) error { - img, err := c.client.GetImage(ctx, r) - if err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("get image by reference: %w", err) - } - if err == nil && img.Labels()[crilabels.ImageLabelKey] != crilabels.ImageLabelValue { - // Make sure the image has the image id as its unique - // identifier that references the image in its lifetime. - configDesc, err := img.Config(ctx) - if err != nil { - return fmt.Errorf("get image id: %w", err) - } - id := configDesc.Digest.String() - labels := c.getLabels(ctx, id) - if err := c.createImageReference(ctx, id, img.Target(), labels); err != nil { - return fmt.Errorf("create image id reference %q: %w", id, err) - } - if err := c.imageStore.Update(ctx, id); err != nil { - return fmt.Errorf("update image store for %q: %w", id, err) - } - // The image id is ready, add the label to mark the image as managed. - if err := c.createImageReference(ctx, r, img.Target(), labels); err != nil { - return fmt.Errorf("create managed label: %w", err) - } - } - // If the image is not found, we should continue updating the cache, - // so that the image can be removed from the cache. - if err := c.imageStore.Update(ctx, r); err != nil { - return fmt.Errorf("update image store for %q: %w", r, err) - } - return nil -} - -func hostDirFromRoots(roots []string) func(string) (string, error) { - rootfn := make([]func(string) (string, error), len(roots)) - for i := range roots { - rootfn[i] = config.HostDirFromRoot(roots[i]) - } - return func(host string) (dir string, err error) { - for _, fn := range rootfn { - dir, err = fn(host) - if (err != nil && !errdefs.IsNotFound(err)) || (dir != "") { - break - } - } - return - } -} - -// registryHosts is the registry hosts to be used by the resolver. -func (c *criService) registryHosts(ctx context.Context, auth *runtime.AuthConfig, updateClientFn config.UpdateClientFunc) docker.RegistryHosts { - paths := filepath.SplitList(c.config.Registry.ConfigPath) - if len(paths) > 0 { - hostOptions := config.HostOptions{ - UpdateClient: updateClientFn, - } - hostOptions.Credentials = func(host string) (string, string, error) { - hostauth := auth - if hostauth == nil { - config := c.config.Registry.Configs[host] - if config.Auth != nil { - hostauth = toRuntimeAuthConfig(*config.Auth) - } - } - return ParseAuth(hostauth, host) - } - hostOptions.HostDir = hostDirFromRoots(paths) - - return config.ConfigureHosts(ctx, hostOptions) - } - - return func(host string) ([]docker.RegistryHost, error) { - var registries []docker.RegistryHost - - endpoints, err := c.registryEndpoints(host) - if err != nil { - return nil, fmt.Errorf("get registry endpoints: %w", err) - } - for _, e := range endpoints { - u, err := url.Parse(e) - if err != nil { - return nil, fmt.Errorf("parse registry endpoint %q from mirrors: %w", e, err) - } - - var ( - transport = newTransport() - client = &http.Client{Transport: transport} - config = c.config.Registry.Configs[u.Host] - ) - - if docker.IsLocalhost(host) && u.Scheme == "http" { - // Skipping TLS verification for localhost - transport.TLSClientConfig = &tls.Config{ - InsecureSkipVerify: true, - } - } - - // Make a copy of `auth`, so that different authorizers would not reference - // the same auth variable. - auth := auth - if auth == nil && config.Auth != nil { - auth = toRuntimeAuthConfig(*config.Auth) - } - - if updateClientFn != nil { - if err := updateClientFn(client); err != nil { - return nil, fmt.Errorf("failed to update http client: %w", err) - } - } - - authorizer := docker.NewDockerAuthorizer( - docker.WithAuthClient(client), - docker.WithAuthCreds(func(host string) (string, string, error) { - return ParseAuth(auth, host) - })) - - if u.Path == "" { - u.Path = "/v2" - } - - registries = append(registries, docker.RegistryHost{ - Client: client, - Authorizer: authorizer, - Host: u.Host, - Scheme: u.Scheme, - Path: u.Path, - Capabilities: docker.HostCapabilityResolve | docker.HostCapabilityPull, - }) - } - return registries, nil - } -} - -// defaultScheme returns the default scheme for a registry host. -func defaultScheme(host string) string { - if docker.IsLocalhost(host) { - return "http" - } - return "https" -} - -// addDefaultScheme returns the endpoint with default scheme -func addDefaultScheme(endpoint string) (string, error) { - if strings.Contains(endpoint, "://") { - return endpoint, nil - } - ue := "dummy://" + endpoint - u, err := url.Parse(ue) - if err != nil { - return "", err - } - return fmt.Sprintf("%s://%s", defaultScheme(u.Host), endpoint), nil -} - -// registryEndpoints returns endpoints for a given host. -// It adds default registry endpoint if it does not exist in the passed-in endpoint list. -// It also supports wildcard host matching with `*`. -func (c *criService) registryEndpoints(host string) ([]string, error) { - var endpoints []string - _, ok := c.config.Registry.Mirrors[host] - if ok { - endpoints = c.config.Registry.Mirrors[host].Endpoints - } else { - endpoints = c.config.Registry.Mirrors["*"].Endpoints - } - defaultHost, err := docker.DefaultHost(host) - if err != nil { - return nil, fmt.Errorf("get default host: %w", err) - } - for i := range endpoints { - en, err := addDefaultScheme(endpoints[i]) - if err != nil { - return nil, fmt.Errorf("parse endpoint url: %w", err) - } - endpoints[i] = en - } - for _, e := range endpoints { - u, err := url.Parse(e) - if err != nil { - return nil, fmt.Errorf("parse endpoint url: %w", err) - } - if u.Host == host { - // Do not add default if the endpoint already exists. - return endpoints, nil - } - } - return append(endpoints, defaultScheme(defaultHost)+"://"+defaultHost), nil -} - -// newTransport returns a new HTTP transport used to pull image. -// TODO(random-liu): Create a library and share this code with `ctr`. -func newTransport() *http.Transport { - return &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - FallbackDelay: 300 * time.Millisecond, - }).DialContext, - MaxIdleConns: 10, - IdleConnTimeout: 30 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - } -} - -// encryptedImagesPullOpts returns the necessary list of pull options required -// for decryption of encrypted images based on the cri decryption configuration. -func (c *criService) encryptedImagesPullOpts() []containerd.RemoteOpt { - if c.config.ImageDecryption.KeyModel == criconfig.KeyModelNode { - ltdd := imgcrypt.Payload{} - decUnpackOpt := encryption.WithUnpackConfigApplyOpts(encryption.WithDecryptedUnpack(<dd)) - opt := containerd.WithUnpackOpts([]containerd.UnpackOpt{decUnpackOpt}) - return []containerd.RemoteOpt{opt} - } - return nil -} - -const ( - // minPullProgressReportInternal is used to prevent the reporter from - // eating more CPU resources - minPullProgressReportInternal = 5 * time.Second - // defaultPullProgressReportInterval represents that how often the - // reporter checks that pull progress. - defaultPullProgressReportInterval = 10 * time.Second -) - -// pullProgressReporter is used to check single PullImage progress. -type pullProgressReporter struct { - ref string - cancel context.CancelFunc - reqReporter pullRequestReporter - timeout time.Duration -} - -func newPullProgressReporter(ref string, cancel context.CancelFunc, timeout time.Duration) *pullProgressReporter { - return &pullProgressReporter{ - ref: ref, - cancel: cancel, - reqReporter: pullRequestReporter{}, - timeout: timeout, - } -} - -func (reporter *pullProgressReporter) optionUpdateClient(client *http.Client) error { - client.Transport = &pullRequestReporterRoundTripper{ - rt: client.Transport, - reqReporter: &reporter.reqReporter, - } - return nil -} - -func (reporter *pullProgressReporter) start(ctx context.Context) { - if reporter.timeout == 0 { - log.G(ctx).Infof("no timeout and will not start pulling image %s reporter", reporter.ref) - return - } - - go func() { - var ( - reportInterval = defaultPullProgressReportInterval - - lastSeenBytesRead = uint64(0) - lastSeenTimestamp = time.Now() - ) - - // check progress more frequently if timeout < default internal - if reporter.timeout < reportInterval { - reportInterval = reporter.timeout / 2 - - if reportInterval < minPullProgressReportInternal { - reportInterval = minPullProgressReportInternal - } - } - - var ticker = time.NewTicker(reportInterval) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - activeReqs, bytesRead := reporter.reqReporter.status() - - log.G(ctx).WithField("ref", reporter.ref). - WithField("activeReqs", activeReqs). - WithField("totalBytesRead", bytesRead). - WithField("lastSeenBytesRead", lastSeenBytesRead). - WithField("lastSeenTimestamp", lastSeenTimestamp). - WithField("reportInterval", reportInterval). - Tracef("progress for image pull") - - if activeReqs == 0 || bytesRead > lastSeenBytesRead { - lastSeenBytesRead = bytesRead - lastSeenTimestamp = time.Now() - continue - } - - if time.Since(lastSeenTimestamp) > reporter.timeout { - log.G(ctx).Errorf("cancel pulling image %s because of no progress in %v", reporter.ref, reporter.timeout) - reporter.cancel() - return - } - case <-ctx.Done(): - activeReqs, bytesRead := reporter.reqReporter.status() - log.G(ctx).Infof("stop pulling image %s: active requests=%v, bytes read=%v", reporter.ref, activeReqs, bytesRead) - return - } - } - }() -} - -// countingReadCloser wraps http.Response.Body with pull request reporter, -// which is used by pullRequestReporterRoundTripper. -type countingReadCloser struct { - once sync.Once - - rc io.ReadCloser - reqReporter *pullRequestReporter -} - -// Read reads bytes from original io.ReadCloser and increases bytes in -// pull request reporter. -func (r *countingReadCloser) Read(p []byte) (int, error) { - n, err := r.rc.Read(p) - r.reqReporter.incByteRead(uint64(n)) - return n, err -} - -// Close closes the original io.ReadCloser and only decreases the number of -// active pull requests once. -func (r *countingReadCloser) Close() error { - err := r.rc.Close() - r.once.Do(r.reqReporter.decRequest) - return err -} - -// pullRequestReporter is used to track the progress per each criapi.PullImage. -type pullRequestReporter struct { - // activeReqs indicates that current number of active pulling requests, - // including auth requests. - activeReqs int32 - // totalBytesRead indicates that the total bytes has been read from - // remote registry. - totalBytesRead uint64 -} - -func (reporter *pullRequestReporter) incRequest() { - atomic.AddInt32(&reporter.activeReqs, 1) -} - -func (reporter *pullRequestReporter) decRequest() { - atomic.AddInt32(&reporter.activeReqs, -1) -} - -func (reporter *pullRequestReporter) incByteRead(nr uint64) { - atomic.AddUint64(&reporter.totalBytesRead, nr) -} - -func (reporter *pullRequestReporter) status() (currentReqs int32, totalBytesRead uint64) { - currentReqs = atomic.LoadInt32(&reporter.activeReqs) - totalBytesRead = atomic.LoadUint64(&reporter.totalBytesRead) - return currentReqs, totalBytesRead -} - -// pullRequestReporterRoundTripper wraps http.RoundTripper with pull request -// reporter which is used to track the progress of active http request with -// counting readable http.Response.Body. -// -// NOTE: -// -// Although containerd provides ingester manager to track the progress -// of pulling request, for example `ctr image pull` shows the console progress -// bar, it needs more CPU resources to open/read the ingested files with -// acquiring containerd metadata plugin's boltdb lock. -// -// Before sending HTTP request to registry, the containerd.Client.Pull library -// will open writer by containerd ingester manager. Based on this, the -// http.RoundTripper wrapper can track the active progress with lower overhead -// even if the ref has been locked in ingester manager by other Pull request. -type pullRequestReporterRoundTripper struct { - rt http.RoundTripper - - reqReporter *pullRequestReporter -} - -func (rt *pullRequestReporterRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { - rt.reqReporter.incRequest() - - resp, err := rt.rt.RoundTrip(req) - if err != nil { - rt.reqReporter.decRequest() - return nil, err - } - - resp.Body = &countingReadCloser{ - rc: resp.Body, - reqReporter: rt.reqReporter, - } - return resp, err -} - -// Given that runtime information is not passed from PullImageRequest, we depend on an experimental annotation -// passed from pod sandbox config to get the runtimeHandler. The annotation key is specified in configuration. -// Once we know the runtime, try to override default snapshotter if it is set for this runtime. -// See https://github.com/containerd/containerd/issues/6657 -func (c *criService) snapshotterFromPodSandboxConfig(ctx context.Context, imageRef string, - s *runtime.PodSandboxConfig) (string, error) { - snapshotter := c.config.ContainerdConfig.Snapshotter - if s == nil || s.Annotations == nil { - return snapshotter, nil - } - - runtimeHandler, ok := s.Annotations[annotations.RuntimeHandler] - if !ok { - return snapshotter, nil - } - - ociRuntime, err := c.getSandboxRuntime(s, runtimeHandler) - if err != nil { - return "", fmt.Errorf("experimental: failed to get sandbox runtime for %s: %w", runtimeHandler, err) - } - - snapshotter = c.runtimeSnapshotter(ctx, ociRuntime) - log.G(ctx).Infof("experimental: PullImage %q for runtime %s, using snapshotter %s", imageRef, runtimeHandler, snapshotter) - return snapshotter, nil -} diff --git a/pkg/cri/server/image_pull_test.go b/pkg/cri/server/image_pull_test.go deleted file mode 100644 index e0b9a3bb6..000000000 --- a/pkg/cri/server/image_pull_test.go +++ /dev/null @@ -1,486 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "encoding/base64" - "testing" - - "github.com/containerd/containerd/pkg/cri/annotations" - criconfig "github.com/containerd/containerd/pkg/cri/config" - "github.com/containerd/containerd/pkg/cri/labels" - - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func TestParseAuth(t *testing.T) { - testUser := "username" - testPasswd := "password" - testAuthLen := base64.StdEncoding.EncodedLen(len(testUser + ":" + testPasswd)) - testAuth := make([]byte, testAuthLen) - base64.StdEncoding.Encode(testAuth, []byte(testUser+":"+testPasswd)) - invalidAuth := make([]byte, testAuthLen) - base64.StdEncoding.Encode(invalidAuth, []byte(testUser+"@"+testPasswd)) - for _, test := range []struct { - desc string - auth *runtime.AuthConfig - host string - expectedUser string - expectedSecret string - expectErr bool - }{ - { - desc: "should not return error if auth config is nil", - }, - { - desc: "should not return error if empty auth is provided for access to anonymous registry", - auth: &runtime.AuthConfig{}, - expectErr: false, - }, - { - desc: "should support identity token", - auth: &runtime.AuthConfig{IdentityToken: "abcd"}, - expectedSecret: "abcd", - }, - { - desc: "should support username and password", - auth: &runtime.AuthConfig{ - Username: testUser, - Password: testPasswd, - }, - expectedUser: testUser, - expectedSecret: testPasswd, - }, - { - desc: "should support auth", - auth: &runtime.AuthConfig{Auth: string(testAuth)}, - expectedUser: testUser, - expectedSecret: testPasswd, - }, - { - desc: "should return error for invalid auth", - auth: &runtime.AuthConfig{Auth: string(invalidAuth)}, - expectErr: true, - }, - { - desc: "should return empty auth if server address doesn't match", - auth: &runtime.AuthConfig{ - Username: testUser, - Password: testPasswd, - ServerAddress: "https://registry-1.io", - }, - host: "registry-2.io", - expectedUser: "", - expectedSecret: "", - }, - { - desc: "should return auth if server address matches", - auth: &runtime.AuthConfig{ - Username: testUser, - Password: testPasswd, - ServerAddress: "https://registry-1.io", - }, - host: "registry-1.io", - expectedUser: testUser, - expectedSecret: testPasswd, - }, - { - desc: "should return auth if server address is not specified", - auth: &runtime.AuthConfig{ - Username: testUser, - Password: testPasswd, - }, - host: "registry-1.io", - expectedUser: testUser, - expectedSecret: testPasswd, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - u, s, err := ParseAuth(test.auth, test.host) - assert.Equal(t, test.expectErr, err != nil) - assert.Equal(t, test.expectedUser, u) - assert.Equal(t, test.expectedSecret, s) - }) - } -} - -func TestRegistryEndpoints(t *testing.T) { - for _, test := range []struct { - desc string - mirrors map[string]criconfig.Mirror - host string - expected []string - }{ - { - desc: "no mirror configured", - mirrors: map[string]criconfig.Mirror{ - "registry-1.io": { - Endpoints: []string{ - "https://registry-1.io", - "https://registry-2.io", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-3.io", - }, - }, - { - desc: "mirror configured", - mirrors: map[string]criconfig.Mirror{ - "registry-3.io": { - Endpoints: []string{ - "https://registry-1.io", - "https://registry-2.io", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-1.io", - "https://registry-2.io", - "https://registry-3.io", - }, - }, - { - desc: "wildcard mirror configured", - mirrors: map[string]criconfig.Mirror{ - "*": { - Endpoints: []string{ - "https://registry-1.io", - "https://registry-2.io", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-1.io", - "https://registry-2.io", - "https://registry-3.io", - }, - }, - { - desc: "host should take precedence if both host and wildcard mirrors are configured", - mirrors: map[string]criconfig.Mirror{ - "*": { - Endpoints: []string{ - "https://registry-1.io", - }, - }, - "registry-3.io": { - Endpoints: []string{ - "https://registry-2.io", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-2.io", - "https://registry-3.io", - }, - }, - { - desc: "default endpoint in list with http", - mirrors: map[string]criconfig.Mirror{ - "registry-3.io": { - Endpoints: []string{ - "https://registry-1.io", - "https://registry-2.io", - "http://registry-3.io", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-1.io", - "https://registry-2.io", - "http://registry-3.io", - }, - }, - { - desc: "default endpoint in list with https", - mirrors: map[string]criconfig.Mirror{ - "registry-3.io": { - Endpoints: []string{ - "https://registry-1.io", - "https://registry-2.io", - "https://registry-3.io", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-1.io", - "https://registry-2.io", - "https://registry-3.io", - }, - }, - { - desc: "default endpoint in list with path", - mirrors: map[string]criconfig.Mirror{ - "registry-3.io": { - Endpoints: []string{ - "https://registry-1.io", - "https://registry-2.io", - "https://registry-3.io/path", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-1.io", - "https://registry-2.io", - "https://registry-3.io/path", - }, - }, - { - desc: "miss scheme endpoint in list with path", - mirrors: map[string]criconfig.Mirror{ - "registry-3.io": { - Endpoints: []string{ - "https://registry-3.io", - "registry-1.io", - "127.0.0.1:1234", - }, - }, - }, - host: "registry-3.io", - expected: []string{ - "https://registry-3.io", - "https://registry-1.io", - "http://127.0.0.1:1234", - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.config.Registry.Mirrors = test.mirrors - got, err := c.registryEndpoints(test.host) - assert.NoError(t, err) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestDefaultScheme(t *testing.T) { - for _, test := range []struct { - desc string - host string - expected string - }{ - { - desc: "should use http by default for localhost", - host: "localhost", - expected: "http", - }, - { - desc: "should use http by default for localhost with port", - host: "localhost:8080", - expected: "http", - }, - { - desc: "should use http by default for 127.0.0.1", - host: "127.0.0.1", - expected: "http", - }, - { - desc: "should use http by default for 127.0.0.1 with port", - host: "127.0.0.1:8080", - expected: "http", - }, - { - desc: "should use http by default for ::1", - host: "::1", - expected: "http", - }, - { - desc: "should use http by default for ::1 with port", - host: "[::1]:8080", - expected: "http", - }, - { - desc: "should use https by default for remote host", - host: "remote", - expected: "https", - }, - { - desc: "should use https by default for remote host with port", - host: "remote:8080", - expected: "https", - }, - { - desc: "should use https by default for remote ip", - host: "8.8.8.8", - expected: "https", - }, - { - desc: "should use https by default for remote ip with port", - host: "8.8.8.8:8080", - expected: "https", - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - got := defaultScheme(test.host) - assert.Equal(t, test.expected, got) - }) - } -} - -func TestEncryptedImagePullOpts(t *testing.T) { - for _, test := range []struct { - desc string - keyModel string - expectedOpts int - }{ - { - desc: "node key model should return one unpack opt", - keyModel: criconfig.KeyModelNode, - expectedOpts: 1, - }, - { - desc: "no key model selected should default to node key model", - keyModel: "", - expectedOpts: 0, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.config.ImageDecryption.KeyModel = test.keyModel - got := len(c.encryptedImagesPullOpts()) - assert.Equal(t, test.expectedOpts, got) - }) - } -} - -func TestSnapshotterFromPodSandboxConfig(t *testing.T) { - defaultSnashotter := "native" - runtimeSnapshotter := "devmapper" - tests := []struct { - desc string - podSandboxConfig *runtime.PodSandboxConfig - expectSnapshotter string - expectErr bool - }{ - { - desc: "should return default snapshotter for nil podSandboxConfig", - expectSnapshotter: defaultSnashotter, - }, - { - desc: "should return default snapshotter for nil podSandboxConfig.Annotations", - podSandboxConfig: &runtime.PodSandboxConfig{}, - expectSnapshotter: defaultSnashotter, - }, - { - desc: "should return default snapshotter for empty podSandboxConfig.Annotations", - podSandboxConfig: &runtime.PodSandboxConfig{ - Annotations: make(map[string]string), - }, - expectSnapshotter: defaultSnashotter, - }, - { - desc: "should return error for runtime not found", - podSandboxConfig: &runtime.PodSandboxConfig{ - Annotations: map[string]string{ - annotations.RuntimeHandler: "runtime-not-exists", - }, - }, - expectErr: true, - expectSnapshotter: "", - }, - { - desc: "should return snapshotter provided in podSandboxConfig.Annotations", - podSandboxConfig: &runtime.PodSandboxConfig{ - Annotations: map[string]string{ - annotations.RuntimeHandler: "exiting-runtime", - }, - }, - expectSnapshotter: runtimeSnapshotter, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.desc, func(t *testing.T) { - cri := newTestCRIService() - cri.config.ContainerdConfig.Snapshotter = defaultSnashotter - cri.config.ContainerdConfig.Runtimes = make(map[string]criconfig.Runtime) - cri.config.ContainerdConfig.Runtimes["exiting-runtime"] = criconfig.Runtime{ - Snapshotter: runtimeSnapshotter, - } - snapshotter, err := cri.snapshotterFromPodSandboxConfig(context.Background(), "test-image", tt.podSandboxConfig) - assert.Equal(t, tt.expectSnapshotter, snapshotter) - if tt.expectErr { - assert.Error(t, err) - } - }) - } -} -func TestImageGetLabels(t *testing.T) { - tests := []struct { - name string - expectedLabel map[string]string - configSandboxImage string - pullImageName string - }{ - { - name: "pinned image labels should get added on sandbox image", - expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue}, - configSandboxImage: "registry.k8s.io/pause:3.9", - pullImageName: "registry.k8s.io/pause:3.9", - }, - { - name: "pinned image labels should get added on sandbox image without tag", - expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue}, - configSandboxImage: "registry.k8s.io/pause", - pullImageName: "registry.k8s.io/pause:latest", - }, - { - name: "pinned image labels should get added on sandbox image specified with tag and digest both", - expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue}, - configSandboxImage: "registry.k8s.io/pause:3.9@sha256:7031c1b283388d2c2e09b57badb803c05ebed362dc88d84b480cc47f72a21097", - pullImageName: "registry.k8s.io/pause@sha256:7031c1b283388d2c2e09b57badb803c05ebed362dc88d84b480cc47f72a21097", - }, - { - name: "pinned image labels should get added on sandbox image specified with digest", - expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue}, - configSandboxImage: "registry.k8s.io/pause@sha256:7031c1b283388d2c2e09b57badb803c05ebed362dc88d84b480cc47f72a21097", - pullImageName: "registry.k8s.io/pause@sha256:7031c1b283388d2c2e09b57badb803c05ebed362dc88d84b480cc47f72a21097", - }, - { - name: "pinned image labels should not get added on other image", - expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue}, - configSandboxImage: "registry.k8s.io/pause:3.9", - pullImageName: "registry.k8s.io/random:latest", - }, - } - - svc := newTestCRIService() - for _, tc := range tests { - tc := tc - t.Run(tc.name, func(t *testing.T) { - svc.config.SandboxImage = tc.configSandboxImage - assert.Equal(t, tc.expectedLabel, svc.getLabels(context.Background(), tc.pullImageName)) - }) - } -} diff --git a/pkg/cri/server/image_remove.go b/pkg/cri/server/image_remove.go deleted file mode 100644 index e10062a31..000000000 --- a/pkg/cri/server/image_remove.go +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "fmt" - - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/images" - "github.com/containerd/containerd/tracing" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// RemoveImage removes the image. -// TODO(random-liu): Update CRI to pass image reference instead of ImageSpec. (See -// kubernetes/kubernetes#46255) -// TODO(random-liu): We should change CRI to distinguish image id and image spec. -// Remove the whole image no matter the it's image id or reference. This is the -// semantic defined in CRI now. -func (c *criService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (*runtime.RemoveImageResponse, error) { - span := tracing.SpanFromContext(ctx) - image, err := c.localResolve(r.GetImage().GetImage()) - if err != nil { - if errdefs.IsNotFound(err) { - span.AddEvent(err.Error()) - // return empty without error when image not found. - return &runtime.RemoveImageResponse{}, nil - } - return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err) - } - span.SetAttributes(tracing.Attribute("image.id", image.ID)) - // Remove all image references. - for i, ref := range image.References { - var opts []images.DeleteOpt - if i == len(image.References)-1 { - // Delete the last image reference synchronously to trigger garbage collection. - // This is best effort. It is possible that the image reference is deleted by - // someone else before this point. - opts = []images.DeleteOpt{images.SynchronousDelete()} - } - err = c.client.ImageService().Delete(ctx, ref, opts...) - if err == nil || errdefs.IsNotFound(err) { - // Update image store to reflect the newest state in containerd. - if err := c.imageStore.Update(ctx, ref); err != nil { - return nil, fmt.Errorf("failed to update image reference %q for %q: %w", ref, image.ID, err) - } - continue - } - return nil, fmt.Errorf("failed to delete image reference %q for %q: %w", ref, image.ID, err) - } - return &runtime.RemoveImageResponse{}, nil -} diff --git a/pkg/cri/server/image_status.go b/pkg/cri/server/image_status.go deleted file mode 100644 index 646414221..000000000 --- a/pkg/cri/server/image_status.go +++ /dev/null @@ -1,112 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/containerd/containerd/errdefs" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" - "github.com/containerd/containerd/tracing" - "github.com/containerd/log" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ImageStatus returns the status of the image, returns nil if the image isn't present. -// TODO(random-liu): We should change CRI to distinguish image id and image spec. (See -// kubernetes/kubernetes#46255) -func (c *criService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (*runtime.ImageStatusResponse, error) { - span := tracing.SpanFromContext(ctx) - image, err := c.localResolve(r.GetImage().GetImage()) - if err != nil { - if errdefs.IsNotFound(err) { - span.AddEvent(err.Error()) - // return empty without error when image not found. - return &runtime.ImageStatusResponse{}, nil - } - return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err) - } - span.SetAttributes(tracing.Attribute("image.id", image.ID)) - // TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot - // doesn't exist? - - runtimeImage := toCRIImage(image) - info, err := c.toCRIImageInfo(ctx, &image, r.GetVerbose()) - if err != nil { - return nil, fmt.Errorf("failed to generate image info: %w", err) - } - - return &runtime.ImageStatusResponse{ - Image: runtimeImage, - Info: info, - }, nil -} - -// toCRIImage converts internal image object to CRI runtime.Image. -func toCRIImage(image imagestore.Image) *runtime.Image { - repoTags, repoDigests := parseImageReferences(image.References) - - runtimeImage := &runtime.Image{ - Id: image.ID, - RepoTags: repoTags, - RepoDigests: repoDigests, - Size_: uint64(image.Size), - Pinned: image.Pinned, - } - - uid, username := getUserFromImage(image.ImageSpec.Config.User) - if uid != nil { - runtimeImage.Uid = &runtime.Int64Value{Value: *uid} - } - runtimeImage.Username = username - - return runtimeImage -} - -// TODO (mikebrow): discuss moving this struct and / or constants for info map for some or all of these fields to CRI -type verboseImageInfo struct { - ChainID string `json:"chainID"` - ImageSpec imagespec.Image `json:"imageSpec"` -} - -// toCRIImageInfo converts internal image object information to CRI image status response info map. -func (c *criService) toCRIImageInfo(ctx context.Context, image *imagestore.Image, verbose bool) (map[string]string, error) { - if !verbose { - return nil, nil - } - - info := make(map[string]string) - - imi := &verboseImageInfo{ - ChainID: image.ChainID, - ImageSpec: image.ImageSpec, - } - - m, err := json.Marshal(imi) - if err == nil { - info["info"] = string(m) - } else { - log.G(ctx).WithError(err).Errorf("failed to marshal info %v", imi) - info["info"] = err.Error() - } - - return info, nil -} diff --git a/pkg/cri/server/image_status_test.go b/pkg/cri/server/image_status_test.go deleted file mode 100644 index fdc44ea41..000000000 --- a/pkg/cri/server/image_status_test.go +++ /dev/null @@ -1,74 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - imagestore "github.com/containerd/containerd/pkg/cri/store/image" -) - -func TestImageStatus(t *testing.T) { - testID := "sha256:d848ce12891bf78792cda4a23c58984033b0c397a55e93a1556202222ecc5ed4" // #nosec G101 - image := imagestore.Image{ - ID: testID, - ChainID: "test-chain-id", - References: []string{ - "gcr.io/library/busybox:latest", - "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582", - }, - Size: 1234, - ImageSpec: imagespec.Image{ - Config: imagespec.ImageConfig{ - User: "user:group", - }, - }, - } - expected := &runtime.Image{ - Id: testID, - RepoTags: []string{"gcr.io/library/busybox:latest"}, - RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}, - Size_: uint64(1234), - Username: "user", - } - - c := newTestCRIService() - t.Logf("should return nil image spec without error for non-exist image") - resp, err := c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{ - Image: &runtime.ImageSpec{Image: testID}, - }) - assert.NoError(t, err) - require.NotNil(t, resp) - assert.Nil(t, resp.GetImage()) - - c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image}) - assert.NoError(t, err) - - t.Logf("should return correct image status for exist image") - resp, err = c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{ - Image: &runtime.ImageSpec{Image: testID}, - }) - assert.NoError(t, err) - assert.NotNil(t, resp) - assert.Equal(t, expected, resp.GetImage()) -} diff --git a/pkg/cri/server/imagefs_info.go b/pkg/cri/server/imagefs_info.go deleted file mode 100644 index 9b561ad57..000000000 --- a/pkg/cri/server/imagefs_info.go +++ /dev/null @@ -1,51 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "time" - - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -// ImageFsInfo returns information of the filesystem that is used to store images. -// TODO(windows): Usage for windows is always 0 right now. Support this for windows. -func (c *criService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) { - snapshots := c.snapshotStore.List() - timestamp := time.Now().UnixNano() - var usedBytes, inodesUsed uint64 - for _, sn := range snapshots { - // Use the oldest timestamp as the timestamp of imagefs info. - if sn.Timestamp < timestamp { - timestamp = sn.Timestamp - } - usedBytes += sn.Size - inodesUsed += sn.Inodes - } - // TODO(random-liu): Handle content store - return &runtime.ImageFsInfoResponse{ - ImageFilesystems: []*runtime.FilesystemUsage{ - { - Timestamp: timestamp, - FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPath}, - UsedBytes: &runtime.UInt64Value{Value: usedBytes}, - InodesUsed: &runtime.UInt64Value{Value: inodesUsed}, - }, - }, - }, nil -} diff --git a/pkg/cri/server/imagefs_info_test.go b/pkg/cri/server/imagefs_info_test.go deleted file mode 100644 index 5845c7545..000000000 --- a/pkg/cri/server/imagefs_info_test.go +++ /dev/null @@ -1,70 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "testing" - - snapshot "github.com/containerd/containerd/snapshots" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" -) - -func TestImageFsInfo(t *testing.T) { - c := newTestCRIService() - snapshots := []snapshotstore.Snapshot{ - { - Key: "key1", - Kind: snapshot.KindActive, - Size: 10, - Inodes: 100, - Timestamp: 234567, - }, - { - Key: "key2", - Kind: snapshot.KindCommitted, - Size: 20, - Inodes: 200, - Timestamp: 123456, - }, - { - Key: "key3", - Kind: snapshot.KindView, - Size: 0, - Inodes: 0, - Timestamp: 345678, - }, - } - expected := &runtime.FilesystemUsage{ - Timestamp: 123456, - FsId: &runtime.FilesystemIdentifier{Mountpoint: testImageFSPath}, - UsedBytes: &runtime.UInt64Value{Value: 30}, - InodesUsed: &runtime.UInt64Value{Value: 300}, - } - for _, sn := range snapshots { - c.snapshotStore.Add(sn) - } - resp, err := c.ImageFsInfo(context.Background(), &runtime.ImageFsInfoRequest{}) - require.NoError(t, err) - stats := resp.GetImageFilesystems() - assert.Len(t, stats, 1) - assert.Equal(t, expected, stats[0]) -} diff --git a/pkg/cri/sbserver/images/image_list.go b/pkg/cri/server/images/image_list.go similarity index 100% rename from pkg/cri/sbserver/images/image_list.go rename to pkg/cri/server/images/image_list.go diff --git a/pkg/cri/sbserver/images/image_list_test.go b/pkg/cri/server/images/image_list_test.go similarity index 100% rename from pkg/cri/sbserver/images/image_list_test.go rename to pkg/cri/server/images/image_list_test.go diff --git a/pkg/cri/sbserver/images/image_pull.go b/pkg/cri/server/images/image_pull.go similarity index 100% rename from pkg/cri/sbserver/images/image_pull.go rename to pkg/cri/server/images/image_pull.go diff --git a/pkg/cri/sbserver/images/image_pull_test.go b/pkg/cri/server/images/image_pull_test.go similarity index 100% rename from pkg/cri/sbserver/images/image_pull_test.go rename to pkg/cri/server/images/image_pull_test.go diff --git a/pkg/cri/sbserver/images/image_remove.go b/pkg/cri/server/images/image_remove.go similarity index 100% rename from pkg/cri/sbserver/images/image_remove.go rename to pkg/cri/server/images/image_remove.go diff --git a/pkg/cri/sbserver/images/image_status.go b/pkg/cri/server/images/image_status.go similarity index 100% rename from pkg/cri/sbserver/images/image_status.go rename to pkg/cri/server/images/image_status.go diff --git a/pkg/cri/sbserver/images/image_status_test.go b/pkg/cri/server/images/image_status_test.go similarity index 100% rename from pkg/cri/sbserver/images/image_status_test.go rename to pkg/cri/server/images/image_status_test.go diff --git a/pkg/cri/sbserver/images/imagefs_info.go b/pkg/cri/server/images/imagefs_info.go similarity index 100% rename from pkg/cri/sbserver/images/imagefs_info.go rename to pkg/cri/server/images/imagefs_info.go diff --git a/pkg/cri/sbserver/images/imagefs_info_test.go b/pkg/cri/server/images/imagefs_info_test.go similarity index 100% rename from pkg/cri/sbserver/images/imagefs_info_test.go rename to pkg/cri/server/images/imagefs_info_test.go diff --git a/pkg/cri/sbserver/images/metrics.go b/pkg/cri/server/images/metrics.go similarity index 100% rename from pkg/cri/sbserver/images/metrics.go rename to pkg/cri/server/images/metrics.go diff --git a/pkg/cri/sbserver/images/service.go b/pkg/cri/server/images/service.go similarity index 100% rename from pkg/cri/sbserver/images/service.go rename to pkg/cri/server/images/service.go diff --git a/pkg/cri/sbserver/images/service_test.go b/pkg/cri/server/images/service_test.go similarity index 100% rename from pkg/cri/sbserver/images/service_test.go rename to pkg/cri/server/images/service_test.go diff --git a/pkg/cri/sbserver/images/snapshots.go b/pkg/cri/server/images/snapshots.go similarity index 100% rename from pkg/cri/sbserver/images/snapshots.go rename to pkg/cri/server/images/snapshots.go diff --git a/pkg/cri/server/metrics.go b/pkg/cri/server/metrics.go index f27674705..b9d198821 100644 --- a/pkg/cri/server/metrics.go +++ b/pkg/cri/server/metrics.go @@ -17,8 +17,7 @@ package server import ( - metrics "github.com/docker/go-metrics" - prom "github.com/prometheus/client_golang/prometheus" + "github.com/docker/go-metrics" ) var ( @@ -40,21 +39,11 @@ var ( networkPluginOperations metrics.LabeledCounter networkPluginOperationsErrors metrics.LabeledCounter networkPluginOperationsLatency metrics.LabeledTimer - - imagePulls metrics.LabeledCounter - inProgressImagePulls metrics.Gauge - // image size in MB / image pull duration in seconds - imagePullThroughput prom.Histogram ) func init() { - const ( - namespace = "containerd" - subsystem = "cri" - ) - // these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle. - ns := metrics.NewNamespace(namespace, subsystem, nil) + ns := metrics.NewNamespace("containerd", "cri_sandboxed", nil) sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes") sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox") @@ -75,19 +64,6 @@ func init() { networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type") networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type") - imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") - inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) - imagePullThroughput = prom.NewHistogram( - prom.HistogramOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "image_pulling_throughput", - Help: "image pull throughput", - Buckets: prom.DefBuckets, - }, - ) - - ns.Add(imagePullThroughput) metrics.Register(ns) } diff --git a/pkg/cri/sbserver/podsandbox/container_linux.go b/pkg/cri/server/podsandbox/container_linux.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/container_linux.go rename to pkg/cri/server/podsandbox/container_linux.go diff --git a/pkg/cri/sbserver/podsandbox/controller.go b/pkg/cri/server/podsandbox/controller.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/controller.go rename to pkg/cri/server/podsandbox/controller.go diff --git a/pkg/cri/sbserver/podsandbox/controller_test.go b/pkg/cri/server/podsandbox/controller_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/controller_test.go rename to pkg/cri/server/podsandbox/controller_test.go diff --git a/pkg/cri/sbserver/podsandbox/helpers.go b/pkg/cri/server/podsandbox/helpers.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers.go rename to pkg/cri/server/podsandbox/helpers.go diff --git a/pkg/cri/sbserver/podsandbox/helpers_linux.go b/pkg/cri/server/podsandbox/helpers_linux.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers_linux.go rename to pkg/cri/server/podsandbox/helpers_linux.go diff --git a/pkg/cri/sbserver/podsandbox/helpers_linux_test.go b/pkg/cri/server/podsandbox/helpers_linux_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers_linux_test.go rename to pkg/cri/server/podsandbox/helpers_linux_test.go diff --git a/pkg/cri/sbserver/podsandbox/helpers_other.go b/pkg/cri/server/podsandbox/helpers_other.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers_other.go rename to pkg/cri/server/podsandbox/helpers_other.go diff --git a/pkg/cri/sbserver/podsandbox/helpers_selinux_linux_test.go b/pkg/cri/server/podsandbox/helpers_selinux_linux_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers_selinux_linux_test.go rename to pkg/cri/server/podsandbox/helpers_selinux_linux_test.go diff --git a/pkg/cri/sbserver/podsandbox/helpers_test.go b/pkg/cri/server/podsandbox/helpers_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers_test.go rename to pkg/cri/server/podsandbox/helpers_test.go diff --git a/pkg/cri/sbserver/podsandbox/helpers_windows.go b/pkg/cri/server/podsandbox/helpers_windows.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/helpers_windows.go rename to pkg/cri/server/podsandbox/helpers_windows.go diff --git a/pkg/cri/sbserver/podsandbox/opts.go b/pkg/cri/server/podsandbox/opts.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/opts.go rename to pkg/cri/server/podsandbox/opts.go diff --git a/pkg/cri/sbserver/podsandbox/recover.go b/pkg/cri/server/podsandbox/recover.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/recover.go rename to pkg/cri/server/podsandbox/recover.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_delete.go b/pkg/cri/server/podsandbox/sandbox_delete.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_delete.go rename to pkg/cri/server/podsandbox/sandbox_delete.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run.go b/pkg/cri/server/podsandbox/sandbox_run.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run.go rename to pkg/cri/server/podsandbox/sandbox_run.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_linux.go b/pkg/cri/server/podsandbox/sandbox_run_linux.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_linux.go rename to pkg/cri/server/podsandbox/sandbox_run_linux.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_linux_test.go b/pkg/cri/server/podsandbox/sandbox_run_linux_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_linux_test.go rename to pkg/cri/server/podsandbox/sandbox_run_linux_test.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_other.go b/pkg/cri/server/podsandbox/sandbox_run_other.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_other.go rename to pkg/cri/server/podsandbox/sandbox_run_other.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_other_test.go b/pkg/cri/server/podsandbox/sandbox_run_other_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_other_test.go rename to pkg/cri/server/podsandbox/sandbox_run_other_test.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_test.go b/pkg/cri/server/podsandbox/sandbox_run_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_test.go rename to pkg/cri/server/podsandbox/sandbox_run_test.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_windows.go b/pkg/cri/server/podsandbox/sandbox_run_windows.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_windows.go rename to pkg/cri/server/podsandbox/sandbox_run_windows.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run_windows_test.go b/pkg/cri/server/podsandbox/sandbox_run_windows_test.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_run_windows_test.go rename to pkg/cri/server/podsandbox/sandbox_run_windows_test.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_stats.go b/pkg/cri/server/podsandbox/sandbox_stats.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_stats.go rename to pkg/cri/server/podsandbox/sandbox_stats.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_status.go b/pkg/cri/server/podsandbox/sandbox_status.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_status.go rename to pkg/cri/server/podsandbox/sandbox_status.go diff --git a/pkg/cri/sbserver/podsandbox/sandbox_stop.go b/pkg/cri/server/podsandbox/sandbox_stop.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/sandbox_stop.go rename to pkg/cri/server/podsandbox/sandbox_stop.go diff --git a/pkg/cri/sbserver/podsandbox/store.go b/pkg/cri/server/podsandbox/store.go similarity index 100% rename from pkg/cri/sbserver/podsandbox/store.go rename to pkg/cri/server/podsandbox/store.go diff --git a/pkg/cri/server/rdt_linux.go b/pkg/cri/server/rdt.go similarity index 100% rename from pkg/cri/server/rdt_linux.go rename to pkg/cri/server/rdt.go diff --git a/pkg/cri/server/rdt_stub_linux.go b/pkg/cri/server/rdt_stub.go similarity index 100% rename from pkg/cri/server/rdt_stub_linux.go rename to pkg/cri/server/rdt_stub.go diff --git a/pkg/cri/server/restart.go b/pkg/cri/server/restart.go index fa98e54d3..8b7455c4d 100644 --- a/pkg/cri/server/restart.go +++ b/pkg/cri/server/restart.go @@ -28,6 +28,9 @@ import ( containerdio "github.com/containerd/containerd/cio" "github.com/containerd/containerd/errdefs" containerdimages "github.com/containerd/containerd/images" + criconfig "github.com/containerd/containerd/pkg/cri/config" + "github.com/containerd/containerd/pkg/cri/server/podsandbox" + "github.com/containerd/containerd/pkg/netns" "github.com/containerd/containerd/platforms" "github.com/containerd/log" "github.com/containerd/typeurl/v2" @@ -38,7 +41,6 @@ import ( containerstore "github.com/containerd/containerd/pkg/cri/store/container" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" ctrdutil "github.com/containerd/containerd/pkg/cri/util" - "github.com/containerd/containerd/pkg/netns" ) // NOTE: The recovery logic has following assumption: when the cri plugin is down: @@ -58,13 +60,27 @@ func (c *criService) recover(ctx context.Context) error { return fmt.Errorf("failed to list sandbox containers: %w", err) } + podSandboxController, ok := c.sandboxControllers[criconfig.ModePodSandbox] + if !ok { + log.G(ctx).Fatal("unable to restore pod sandboxes, no controller found") + } + + podSandboxLoader, ok := podSandboxController.(podSandboxRecover) + if !ok { + log.G(ctx).Fatal("pod sandbox controller doesn't support recovery") + } + eg, ctx2 := errgroup.WithContext(ctx) for _, sandbox := range sandboxes { sandbox := sandbox eg.Go(func() error { - sb, err := c.loadSandbox(ctx2, sandbox) + sb, err := podSandboxLoader.RecoverContainer(ctx2, sandbox) if err != nil { - log.G(ctx2).WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID()) + log.G(ctx2). + WithError(err). + WithField("sandbox", sandbox.ID()). + Error("Failed to load sandbox") + return nil } log.G(ctx2).Debugf("Loaded sandbox %+v", sb) @@ -81,6 +97,57 @@ func (c *criService) recover(ctx context.Context) error { return err } + // Recover sandboxes in the new SandboxStore + storedSandboxes, err := c.client.SandboxStore().List(ctx) + if err != nil { + return fmt.Errorf("failed to list sandboxes from API: %w", err) + } + for _, sbx := range storedSandboxes { + if _, err := c.sandboxStore.Get(sbx.ID); err == nil { + continue + } + + metadata := sandboxstore.Metadata{} + err := sbx.GetExtension(podsandbox.MetadataKey, &metadata) + if err != nil { + return fmt.Errorf("failed to get metadata for stored sandbox %q: %w", sbx.ID, err) + } + + var ( + state = sandboxstore.StateUnknown + controller = c.sandboxControllers[criconfig.ModeShim] + ) + + status, err := controller.Status(ctx, sbx.ID, false) + if err != nil { + log.G(ctx). + WithError(err). + WithField("sandbox", sbx.ID). + Error("failed to recover sandbox state") + + if errdefs.IsNotFound(err) { + state = sandboxstore.StateNotReady + } + } else { + if code, ok := runtime.PodSandboxState_value[status.State]; ok { + if code == int32(runtime.PodSandboxState_SANDBOX_READY) { + state = sandboxstore.StateReady + } else if code == int32(runtime.PodSandboxState_SANDBOX_NOTREADY) { + state = sandboxstore.StateNotReady + } + } + } + + sb := sandboxstore.NewSandbox(metadata, sandboxstore.Status{State: state}) + + // Load network namespace. + sb.NetNS = getNetNS(&metadata) + + if err := c.sandboxStore.Add(sb); err != nil { + return fmt.Errorf("failed to add stored sandbox %q to store: %w", sbx.ID, err) + } + } + // Recover all containers. containers, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindContainer)) if err != nil { @@ -92,7 +159,11 @@ func (c *criService) recover(ctx context.Context) error { eg.Go(func() error { cntr, err := c.loadContainer(ctx2, container) if err != nil { - log.G(ctx2).WithError(err).Errorf("Failed to load container %q", container.ID()) + log.G(ctx2). + WithError(err). + WithField("container", container.ID()). + Error("Failed to load container") + return nil } log.G(ctx2).Debugf("Loaded container %+v", cntr) @@ -339,103 +410,18 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe return containerstore.NewContainer(*meta, opts...) } -// loadSandbox loads sandbox from containerd. -func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) { - ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout) - defer cancel() - var sandbox sandboxstore.Sandbox - // Load sandbox metadata. - exts, err := cntr.Extensions(ctx) - if err != nil { - return sandbox, fmt.Errorf("failed to get sandbox container extensions: %w", err) - } - ext, ok := exts[sandboxMetadataExtension] - if !ok { - return sandbox, fmt.Errorf("metadata extension %q not found", sandboxMetadataExtension) - } - data, err := typeurl.UnmarshalAny(ext) - if err != nil { - return sandbox, fmt.Errorf("failed to unmarshal metadata extension %q: %w", ext, err) - } - meta := data.(*sandboxstore.Metadata) - - s, err := func() (sandboxstore.Status, error) { - status := unknownSandboxStatus() - // Load sandbox created timestamp. - info, err := cntr.Info(ctx) - if err != nil { - return status, fmt.Errorf("failed to get sandbox container info: %w", err) - } - status.CreatedAt = info.CreatedAt - - // Load sandbox state. - t, err := cntr.Task(ctx, nil) - if err != nil && !errdefs.IsNotFound(err) { - return status, fmt.Errorf("failed to load task: %w", err) - } - var taskStatus containerd.Status - var notFound bool - if errdefs.IsNotFound(err) { - // Task is not found. - notFound = true - } else { - // Task is found. Get task status. - taskStatus, err = t.Status(ctx) - if err != nil { - // It's still possible that task is deleted during this window. - if !errdefs.IsNotFound(err) { - return status, fmt.Errorf("failed to get task status: %w", err) - } - notFound = true - } - } - if notFound { - // Task does not exist, set sandbox state as NOTREADY. - status.State = sandboxstore.StateNotReady - } else { - if taskStatus.Status == containerd.Running { - // Wait for the task for sandbox monitor. - // wait is a long running background request, no timeout needed. - exitCh, err := t.Wait(ctrdutil.NamespacedContext()) - if err != nil { - if !errdefs.IsNotFound(err) { - return status, fmt.Errorf("failed to wait for task: %w", err) - } - status.State = sandboxstore.StateNotReady - } else { - // Task is running, set sandbox state as READY. - status.State = sandboxstore.StateReady - status.Pid = t.Pid() - c.eventMonitor.startSandboxExitMonitor(context.Background(), meta.ID, status.Pid, exitCh) - } - } else { - // Task is not running. Delete the task and set sandbox state as NOTREADY. - if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - return status, fmt.Errorf("failed to delete task: %w", err) - } - status.State = sandboxstore.StateNotReady - } - } - return status, nil - }() - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to load sandbox status for %q", cntr.ID()) - } - - sandbox = sandboxstore.NewSandbox(*meta, s) - sandbox.Container = cntr +// podSandboxRecover is an additional interface implemented by podsandbox/ controller to handle +// Pod sandbox containers recovery. +type podSandboxRecover interface { + RecoverContainer(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) +} +func getNetNS(meta *sandboxstore.Metadata) *netns.NetNS { // Don't need to load netns for host network sandbox. if hostNetwork(meta.Config) { - return sandbox, nil + return nil } - // Load network namespace. - sandbox.NetNS = netns.LoadNetNS(meta.NetNSPath) - - // It doesn't matter whether task is running or not. If it is running, sandbox - // status will be `READY`; if it is not running, sandbox status will be `NOT_READY`, - // kubelet will stop the sandbox which will properly cleanup everything. - return sandbox, nil + return netns.LoadNetNS(meta.NetNSPath) } // loadImages loads images from containerd. @@ -466,7 +452,7 @@ func (c *criService) loadImages(ctx context.Context, cImages []containerd.Image) log.G(ctx).Warnf("The image %s is not unpacked.", i.Name()) // TODO(random-liu): Consider whether we should try unpack here. } - if err := c.updateImage(ctx, i.Name()); err != nil { + if err := c.UpdateImage(ctx, i.Name()); err != nil { log.G(ctx).WithError(err).Warnf("Failed to update reference for image %q", i.Name()) return } diff --git a/pkg/cri/server/sandbox_remove.go b/pkg/cri/server/sandbox_remove.go index c440eb5be..4cccd13dc 100644 --- a/pkg/cri/server/sandbox_remove.go +++ b/pkg/cri/server/sandbox_remove.go @@ -21,7 +21,6 @@ import ( "fmt" "time" - "github.com/containerd/containerd" "github.com/containerd/containerd/errdefs" "github.com/containerd/log" @@ -49,7 +48,7 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS // If the sandbox is still running, not ready, or in an unknown state, forcibly stop it. // Even if it's in a NotReady state, this will close its network namespace, if open. // This can happen if the task process associated with the Pod died or it was killed. - log.L.Infof("Forcibly stopping sandbox %q", id) + log.G(ctx).Infof("Forcibly stopping sandbox %q", id) if err := c.stopPodSandbox(ctx, sandbox); err != nil { return nil, fmt.Errorf("failed to forcibly stop sandbox %q: %w", id, err) } @@ -80,26 +79,19 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS } } - // Cleanup the sandbox root directories. - sandboxRootDir := c.getSandboxRootDir(id) - if err := ensureRemoveAll(ctx, sandboxRootDir); err != nil { - return nil, fmt.Errorf("failed to remove sandbox root directory %q: %w", - sandboxRootDir, err) - } - volatileSandboxRootDir := c.getVolatileSandboxRootDir(id) - if err := ensureRemoveAll(ctx, volatileSandboxRootDir); err != nil { - return nil, fmt.Errorf("failed to remove volatile sandbox root directory %q: %w", - volatileSandboxRootDir, err) + // Use sandbox controller to delete sandbox + controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox controller: %w", err) } - // Delete sandbox container. - if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to delete sandbox container %q: %w", id, err) - } - log.G(ctx).Tracef("Remove called for sandbox container %q that does not exist", id) + if err := controller.Shutdown(ctx, id); err != nil && !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to delete sandbox %q: %w", id, err) } + // Send CONTAINER_DELETED event with ContainerId equal to SandboxId. + c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_DELETED_EVENT) + err = c.nri.RemovePodSandbox(ctx, &sandbox) if err != nil { log.G(ctx).WithError(err).Errorf("NRI pod removal notification failed") @@ -112,12 +104,13 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS // 3) On-going operations which have held the reference will not be affected. c.sandboxStore.Delete(id) + if err := c.client.SandboxStore().Delete(ctx, id); err != nil { + return nil, fmt.Errorf("failed to remove sandbox metadata from store: %w", err) + } + // Release the sandbox name reserved for the sandbox. c.sandboxNameIndex.ReleaseByKey(id) - // Send CONTAINER_DELETED event with both ContainerId and SandboxId equal to SandboxId. - c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_DELETED_EVENT) - sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start) return &runtime.RemovePodSandboxResponse{}, nil diff --git a/pkg/cri/server/sandbox_run.go b/pkg/cri/server/sandbox_run.go index a9540a1a5..6dbecdc38 100644 --- a/pkg/cri/server/sandbox_run.go +++ b/pkg/cri/server/sandbox_run.go @@ -23,27 +23,22 @@ import ( "fmt" "math" "path/filepath" - goruntime "runtime" "strings" "time" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" "github.com/containerd/typeurl/v2" - "github.com/davecgh/go-spew/spew" - selinux "github.com/opencontainers/selinux/go-selinux" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" "github.com/containerd/containerd" - containerdio "github.com/containerd/containerd/cio" - "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/pkg/cri/annotations" + "github.com/containerd/containerd/pkg/cri/bandwidth" criconfig "github.com/containerd/containerd/pkg/cri/config" - customopts "github.com/containerd/containerd/pkg/cri/opts" - "github.com/containerd/containerd/pkg/cri/server/bandwidth" + "github.com/containerd/containerd/pkg/cri/server/podsandbox" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" "github.com/containerd/containerd/pkg/cri/util" "github.com/containerd/containerd/pkg/netns" - "github.com/containerd/containerd/snapshots" + sb "github.com/containerd/containerd/sandbox" "github.com/containerd/log" ) @@ -79,13 +74,42 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err) } defer func() { - // Release the name if the function returns with an error and all the resource cleanup is done. + // Release the name if the function returns with an error. // When cleanupErr != nil, the name will be cleaned in sandbox_remove. if retErr != nil && cleanupErr == nil { c.sandboxNameIndex.ReleaseByName(name) } }() + var ( + err error + sandboxInfo = sb.Sandbox{ID: id} + ) + + ociRuntime, err := c.getSandboxRuntime(config, r.GetRuntimeHandler()) + if err != nil { + return nil, fmt.Errorf("unable to get OCI runtime for sandbox %q: %w", id, err) + } + + sandboxInfo.Runtime.Name = ociRuntime.Type + + runtimeStart := time.Now() + // Retrieve runtime options + runtimeOpts, err := generateRuntimeOptions(ociRuntime) + if err != nil { + return nil, fmt.Errorf("failed to generate sandbox runtime options: %w", err) + } + + if runtimeOpts != nil { + sandboxInfo.Runtime.Options, err = typeurl.MarshalAny(runtimeOpts) + if err != nil { + return nil, fmt.Errorf("failed to marshal runtime options: %w", err) + } + } + + // Save sandbox name + sandboxInfo.AddLabel("name", name) + // Create initial internal sandbox object. sandbox := sandboxstore.NewSandbox( sandboxstore.Metadata{ @@ -99,89 +123,17 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox }, ) - // Ensure sandbox container image snapshot. - image, err := c.ensureImageExists(ctx, c.config.SandboxImage, config) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox image %q: %w", c.config.SandboxImage, err) + if _, err := c.client.SandboxStore().Create(ctx, sandboxInfo); err != nil { + return nil, fmt.Errorf("failed to save sandbox metadata: %w", err) } - containerdImage, err := c.toContainerdImage(ctx, *image) - if err != nil { - return nil, fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err) - } - - ociRuntime, err := c.getSandboxRuntime(config, r.GetRuntimeHandler()) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) - } - log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime) - - runtimeStart := time.Now() - // Create sandbox container. - // NOTE: sandboxContainerSpec SHOULD NOT have side - // effect, e.g. accessing/creating files, so that we can test - // it safely. - // NOTE: the network namespace path will be created later and update through updateNetNamespacePath function - spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations) - if err != nil { - return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err) - } - log.G(ctx).WithField("podsandboxid", id).Debugf("sandbox container spec: %#+v", spew.NewFormatter(spec)) - sandbox.ProcessLabel = spec.Process.SelinuxLabel defer func() { - if retErr != nil { - selinux.ReleaseLabel(sandbox.ProcessLabel) + if retErr != nil && cleanupErr == nil { + cleanupErr = c.client.SandboxStore().Delete(ctx, id) } }() - // handle any KVM based runtime - if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil { - return nil, err - } - - if config.GetLinux().GetSecurityContext().GetPrivileged() { - // If privileged don't set selinux label, but we still record the MCS label so that - // the unused label can be freed later. - spec.Process.SelinuxLabel = "" - } - - // Generate spec options that will be applied to the spec later. - specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config) - if err != nil { - return nil, fmt.Errorf("failed to generate sandbox container spec options: %w", err) - } - - sandboxLabels := buildLabels(config.Labels, image.ImageSpec.Config.Labels, containerKindSandbox) - - runtimeOpts, err := generateRuntimeOptions(ociRuntime) - if err != nil { - return nil, fmt.Errorf("failed to generate runtime options: %w", err) - } - - sOpts := []snapshots.Opt{snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))} - extraSOpts, err := sandboxSnapshotterOpts(config) - if err != nil { - return nil, err - } - sOpts = append(sOpts, extraSOpts...) - - opts := []containerd.NewContainerOpts{ - containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)), - customopts.WithNewSnapshot(id, containerdImage, sOpts...), - containerd.WithSpec(spec, specOpts...), - containerd.WithContainerLabels(sandboxLabels), - containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata), - containerd.WithRuntime(ociRuntime.Type, runtimeOpts)} - - container, err := c.client.NewContainer(ctx, id, opts...) - if err != nil { - return nil, fmt.Errorf("failed to create containerd container: %w", err) - } - - // Add container into sandbox store in INIT state. - sandbox.Container = container - defer func() { - // Put the sandbox into sandbox store when the some resource fails to be cleaned. + // Put the sandbox into sandbox store when some resources fail to be cleaned. if retErr != nil && cleanupErr != nil { log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id) if err := c.sandboxStore.Add(sandbox); err != nil { @@ -190,69 +142,16 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } }() - defer func() { - // Delete container only if all the resource cleanup is done. - if retErr != nil && cleanupErr == nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id) - } - } - }() - - // Create sandbox container root directories. - sandboxRootDir := c.getSandboxRootDir(id) - if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create sandbox root directory %q: %w", - sandboxRootDir, err) - } - defer func() { - if retErr != nil { - // Cleanup the sandbox root directory. - if err := c.os.RemoveAll(sandboxRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove sandbox root directory %q", - sandboxRootDir) - } - } - }() - volatileSandboxRootDir := c.getVolatileSandboxRootDir(id) - if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create volatile sandbox root directory %q: %w", - volatileSandboxRootDir, err) - } - defer func() { - if retErr != nil { - // Cleanup the volatile sandbox root directory. - if err := c.os.RemoveAll(volatileSandboxRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove volatile sandbox root directory %q", - volatileSandboxRootDir) - } - } - }() - - // Setup files required for the sandbox. - if err = c.setupSandboxFiles(id, config); err != nil { - return nil, fmt.Errorf("failed to setup sandbox files: %w", err) - } - defer func() { - if retErr != nil { - if err = c.cleanupSandboxFiles(id, config); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to cleanup sandbox files in %q", - sandboxRootDir) - } - } - }() - - // Update sandbox created timestamp. - info, err := container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox container info: %w", err) - } - + // XXX: What we really want here is to call controller.Platform() and then check + // platform.OS, but that is only populated after controller.Create() and that needs to be + // done later (uses sandbox.NSPath that we will set just _after_ this). + // So, lets check for the Linux section on the config, if that is populated, we assume the + // platform is linux. + // This is a hack, we should improve the controller interface to return the platform + // earlier. But should work fine for this specific use. userNsEnabled := false - if goruntime.GOOS != "windows" { - usernsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() + if linux := config.GetLinux(); linux != nil { + usernsOpts := linux.GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() if usernsOpts != nil && usernsOpts.GetMode() == runtime.NamespaceMode_POD { userNsEnabled = true } @@ -267,12 +166,11 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // !userNsEnabled case, therefore doing it would defeat the purpose. // // The difference between the cases is the use of netns.NewNetNS() vs - // netns.NewNetNSFromPID() and we verify the task is still running in the other case. + // netns.NewNetNSFromPID(). // // To simplify this, in the future, we should just remove this case (podNetwork && // !userNsEnabled) and just keep the other case (podNetwork && userNsEnabled). netStart := time.Now() - // If it is not in host network namespace then create a namespace and set the sandbox // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network // namespaces. If the pod is in host network namespace then both are empty and should not @@ -285,10 +183,10 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err != nil { return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err) } + // Update network namespace in the store, which is used to generate the container's spec sandbox.NetNSPath = sandbox.NetNS.GetPath() - defer func() { - // Remove the network namespace only if all the resource cleanup is done. + // Remove the network namespace only if all the resource cleanup is done if retErr != nil && cleanupErr == nil { if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil { log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) @@ -298,22 +196,19 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } }() - // Update network namespace in the container's spec - c.updateNetNamespacePath(spec, sandbox.NetNSPath) - - if err := container.Update(ctx, - // Update spec of the container - containerd.UpdateContainerOpts(containerd.WithSpec(spec)), - // Update sandbox metadata to include NetNS info - containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata)), - ); err != nil { - return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err) + if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { + return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) + } + // Save sandbox metadata to store + if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { + return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) } // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call. - // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions. + // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource + // creation functions. defer func() { - // Teardown the network only if all the resource cleanup is done. + // Remove the network namespace only if all the resource cleanup is done. if retErr != nil && cleanupErr == nil { deferCtx, deferCancel := util.DeferContext() defer deferCancel() @@ -321,6 +216,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil { log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id) } + } }() @@ -335,59 +231,59 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err := c.setupPodNetwork(ctx, &sandbox); err != nil { return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err) } - - // Update metadata here to save CNI result and pod IPs to disk. - if err := container.Update(ctx, - // Update sandbox metadata to include NetNS info - containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata)), - ); err != nil { - return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err) - } - sandboxCreateNetworkTimer.UpdateSince(netStart) } - // Create sandbox task in containerd. - log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).", - id, name) + if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { + return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) + } - var taskOpts []containerd.NewTaskOpts - if ociRuntime.Path != "" { - taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path)) - } - // We don't need stdio for sandbox container. - task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...) + controller, err := c.getSandboxController(config, r.GetRuntimeHandler()) if err != nil { - return nil, fmt.Errorf("failed to create containerd task: %w", err) + return nil, fmt.Errorf("failed to get sandbox controller: %w", err) } - defer func() { - if retErr != nil { - deferCtx, deferCancel := util.DeferContext() - defer deferCancel() - // Cleanup the sandbox container if an error is returned. - if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id) - cleanupErr = err + + // Save sandbox metadata to store + if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { + return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) + } + + if err := controller.Create(ctx, id, sb.WithOptions(config), sb.WithNetNSPath(sandbox.NetNSPath)); err != nil { + return nil, fmt.Errorf("failed to create sandbox %q: %w", id, err) + } + + ctrl, err := controller.Start(ctx, id) + if err != nil { + sandbox.Container, _ = c.client.LoadContainer(ctx, id) + var cerr podsandbox.CleanupErr + if errors.As(err, &cerr) { + cleanupErr = fmt.Errorf("failed to cleanup sandbox: %w", cerr) + + // Strip last error as cleanup error to handle separately + if merr, ok := err.(interface{ Unwrap() []error }); ok { + if errs := merr.Unwrap(); len(errs) > 0 { + err = errs[0] + } } } - }() - - // wait is a long running background request, no timeout needed. - exitCh, err := task.Wait(util.NamespacedContext()) - if err != nil { - return nil, fmt.Errorf("failed to wait for sandbox container task: %w", err) + return nil, fmt.Errorf("failed to start sandbox %q: %w", id, err) } if !hostNetwork(config) && userNsEnabled { // If userns is enabled, then the netns was created by the OCI runtime - // when creating "task". The OCI runtime needs to create the netns + // on controller.Start(). The OCI runtime needs to create the netns // because, if userns is in use, the netns needs to be owned by the // userns. So, let the OCI runtime just handle this for us. // If the netns is not owned by the userns several problems will happen. // For instance, the container will lack permission (even if // capabilities are present) to modify the netns or, even worse, the OCI // runtime will fail to mount sysfs: - // https://github.com/torvalds/linux/commit/7dc5dbc879bd0779924b5132a48b731a0bc04a1e#diff-4839664cd0c8eab716e064323c7cd71fR1164 + // https://github.com/torvalds/linux/commit/7dc5dbc879bd0779924b5132a48b731a0bc04a1e#diff-4839664cd0c8eab716e064323c7cd71fR1164 + // + // Note we do this after controller.Start(), as before that we + // can't get the PID for the sandbox that we need for the netns. + // Doing a controller.Status() call before that fails (can't + // find the sandbox) so we can't get the PID. netStart := time.Now() // If it is not in host network namespace then create a namespace and set the sandbox @@ -398,19 +294,16 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if c.config.NetNSMountsUnderStateDir { netnsMountDir = filepath.Join(c.config.StateDir, "netns") } - sandbox.NetNS, err = netns.NewNetNSFromPID(netnsMountDir, task.Pid()) + + sandbox.NetNS, err = netns.NewNetNSFromPID(netnsMountDir, ctrl.Pid) if err != nil { return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err) } - // Verify task is still in created state. - if st, err := task.Status(ctx); err != nil || st.Status != containerd.Created { - return nil, fmt.Errorf("failed to create pod sandbox %q: err is %v - status is %q and is expected %q", id, err, st.Status, containerd.Created) - } + // Update network namespace in the store, which is used to generate the container's spec sandbox.NetNSPath = sandbox.NetNS.GetPath() - defer func() { - // Remove the network namespace only if all the resource cleanup is done. + // Remove the network namespace only if all the resource cleanup is done if retErr != nil && cleanupErr == nil { if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil { log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) @@ -420,21 +313,19 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } }() - // Update network namespace in the container's spec - c.updateNetNamespacePath(spec, sandbox.NetNSPath) - - if err := container.Update(ctx, - // Update spec of the container - containerd.UpdateContainerOpts(containerd.WithSpec(spec)), - // Update sandbox metadata to include NetNS info - containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata))); err != nil { - return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err) + if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { + return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) + } + // Save sandbox metadata to store + if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { + return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) } // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call. - // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions. + // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource + // creation functions. defer func() { - // Teardown the network only if all the resource cleanup is done. + // Remove the network namespace only if all the resource cleanup is done. if retErr != nil && cleanupErr == nil { deferCtx, deferCancel := util.DeferContext() defer deferCancel() @@ -442,6 +333,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil { log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id) } + } }() @@ -456,10 +348,25 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err := c.setupPodNetwork(ctx, &sandbox); err != nil { return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err) } - sandboxCreateNetworkTimer.UpdateSince(netStart) } + // TODO: get rid of this. sandbox object should no longer have Container field. + if ociRuntime.SandboxMode == string(criconfig.ModePodSandbox) { + container, err := c.client.LoadContainer(ctx, id) + if err != nil { + return nil, fmt.Errorf("failed to load container %q for sandbox: %w", id, err) + } + sandbox.Container = container + } + + labels := ctrl.Labels + if labels == nil { + labels = map[string]string{} + } + + sandbox.ProcessLabel = labels["selinux_label"] + err = c.nri.RunPodSandbox(ctx, &sandbox) if err != nil { return nil, fmt.Errorf("NRI RunPodSandbox failed: %w", err) @@ -473,20 +380,17 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } }() - if err := task.Start(ctx); err != nil { - return nil, fmt.Errorf("failed to start sandbox container task %q: %w", id, err) - } - if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { // Set the pod sandbox as ready after successfully start sandbox container. - status.Pid = task.Pid() + status.Pid = ctrl.Pid status.State = sandboxstore.StateReady - status.CreatedAt = info.CreatedAt + status.CreatedAt = ctrl.CreatedAt return status, nil }); err != nil { return nil, fmt.Errorf("failed to update sandbox status: %w", err) } + // Add sandbox into sandbox store in INIT state. if err := c.sandboxStore.Add(sandbox); err != nil { return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err) } @@ -496,17 +400,33 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // SandboxStatus from the store and include it in the event. c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) + // TODO: Use sandbox client instead + exitCh := make(chan containerd.ExitStatus, 1) + go func() { + defer close(exitCh) + + ctx := util.NamespacedContext() + resp, err := controller.Wait(ctx, id) + if err != nil { + log.G(ctx).WithError(err).Error("failed to wait for sandbox exit") + exitCh <- *containerd.NewExitStatus(containerd.UnknownExitStatus, time.Time{}, err) + return + } + + exitCh <- *containerd.NewExitStatus(resp.ExitStatus, resp.ExitedAt, nil) + }() + // start the monitor after adding sandbox into the store, this ensures // that sandbox is in the store, when event monitor receives the TaskExit event. // // TaskOOM from containerd may come before sandbox is added to store, // but we don't care about sandbox TaskOOM right now, so it is fine. - c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh) + c.eventMonitor.startSandboxExitMonitor(context.Background(), id, ctrl.Pid, exitCh) - // Send CONTAINER_STARTED event with both ContainerId and SandboxId equal to SandboxId. + // Send CONTAINER_STARTED event with ContainerId equal to SandboxId. c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_STARTED_EVENT) - sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart) + sandboxRuntimeCreateTimer.WithValues(labels["oci_runtime_type"]).UpdateSince(runtimeStart) return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil } @@ -763,6 +683,25 @@ func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtime return handler, nil } +// getSandboxController returns the sandbox controller configuration for sandbox. +// If absent in legacy case, it will return the default controller. +func (c *criService) getSandboxController(config *runtime.PodSandboxConfig, runtimeHandler string) (sb.Controller, error) { + ociRuntime, err := c.getSandboxRuntime(config, runtimeHandler) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) + } + // Validate mode + if err = ValidateMode(ociRuntime.SandboxMode); err != nil { + return nil, err + } + // Use sandbox controller to delete sandbox + controller, exist := c.sandboxControllers[criconfig.SandboxControllerMode(ociRuntime.SandboxMode)] + if !exist { + return nil, fmt.Errorf("sandbox controller %s not exist", ociRuntime.SandboxMode) + } + return controller, nil +} + func logDebugCNIResult(ctx context.Context, sandboxID string, result *cni.Result) { if log.GetLevel() < log.DebugLevel { return diff --git a/pkg/cri/server/sandbox_run_linux.go b/pkg/cri/server/sandbox_run_linux.go deleted file mode 100644 index f69413766..000000000 --- a/pkg/cri/server/sandbox_run_linux.go +++ /dev/null @@ -1,361 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "fmt" - "os" - "strconv" - "strings" - - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/snapshots" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - selinux "github.com/opencontainers/selinux/go-selinux" - "golang.org/x/sys/unix" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/annotations" - customopts "github.com/containerd/containerd/pkg/cri/opts" - "github.com/containerd/containerd/pkg/userns" -) - -func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) { - // Creates a spec Generator with the default spec. - // TODO(random-liu): [P1] Compare the default settings with docker and containerd default. - specOpts := []oci.SpecOpts{ - oci.WithoutRunMount, - customopts.WithoutDefaultSecuritySettings, - customopts.WithRelativeRoot(relativeRootfsPath), - oci.WithEnv(imageConfig.Env), - oci.WithRootFSReadonly(), - oci.WithHostname(config.GetHostname()), - } - if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 { - // Pause image must have entrypoint or cmd. - return nil, fmt.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig) - } - specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...)) - - // Set cgroups parent. - if c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDisabledCgroups) - } else { - if config.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id) - specOpts = append(specOpts, oci.WithCgroup(cgroupsPath)) - } - } - - // When cgroup parent is not set, containerd-shim will create container in a child cgroup - // of the cgroup itself is in. - // TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified. - - // Set namespace options. - var ( - securityContext = config.GetLinux().GetSecurityContext() - nsOptions = securityContext.GetNamespaceOptions() - ) - if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE { - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace)) - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace)) - } else { - specOpts = append(specOpts, oci.WithLinuxNamespace( - runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - Path: nsPath, - })) - } - if nsOptions.GetPid() == runtime.NamespaceMode_NODE { - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace)) - } - if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) - } - - usernsOpts := nsOptions.GetUsernsOptions() - uids, gids, err := parseUsernsIDs(usernsOpts) - var usernsEnabled bool - if err != nil { - return nil, fmt.Errorf("user namespace configuration: %w", err) - } - - if usernsOpts != nil { - switch mode := usernsOpts.GetMode(); mode { - case runtime.NamespaceMode_NODE: - specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace)) - case runtime.NamespaceMode_POD: - specOpts = append(specOpts, oci.WithUserNamespace(uids, gids)) - usernsEnabled = true - default: - return nil, fmt.Errorf("unsupported user namespace mode: %q", mode) - } - } - - // It's fine to generate the spec before the sandbox /dev/shm - // is actually created. - sandboxDevShm := c.getSandboxDevShm(id) - if nsOptions.GetIpc() == runtime.NamespaceMode_NODE { - sandboxDevShm = devShm - } - // Remove the default /dev/shm mount from defaultMounts, it is added in oci/mounts.go. - specOpts = append(specOpts, oci.WithoutMounts(devShm)) - // In future the when user-namespace is enabled, the `nosuid, nodev, noexec` flags are - // required, otherwise the remount will fail with EPERM. Just use them unconditionally, - // they are nice to have anyways. - specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{ - { - Source: sandboxDevShm, - Destination: devShm, - Type: "bind", - Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"}, - }, - // Add resolv.conf for katacontainers to setup the DNS of pod VM properly. - { - Source: c.getResolvPath(id), - Destination: resolvConfPath, - Type: "bind", - Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"}, - }, - })) - - processLabel, mountLabel, err := initLabelsFromOpt(securityContext.GetSelinuxOptions()) - if err != nil { - return nil, fmt.Errorf("failed to init selinux options %+v: %w", securityContext.GetSelinuxOptions(), err) - } - defer func() { - if retErr != nil { - selinux.ReleaseLabel(processLabel) - } - }() - - supplementalGroups := securityContext.GetSupplementalGroups() - specOpts = append(specOpts, - customopts.WithSelinuxLabels(processLabel, mountLabel), - customopts.WithSupplementalGroups(supplementalGroups), - ) - - // Add sysctls - sysctls := config.GetLinux().GetSysctls() - if sysctls == nil { - sysctls = make(map[string]string) - } - _, ipUnprivilegedPortStart := sysctls["net.ipv4.ip_unprivileged_port_start"] - _, pingGroupRange := sysctls["net.ipv4.ping_group_range"] - if nsOptions.GetNetwork() != runtime.NamespaceMode_NODE { - if c.config.EnableUnprivilegedPorts && !ipUnprivilegedPortStart { - sysctls["net.ipv4.ip_unprivileged_port_start"] = "0" - } - if c.config.EnableUnprivilegedICMP && !pingGroupRange && !userns.RunningInUserNS() && !usernsEnabled { - sysctls["net.ipv4.ping_group_range"] = "0 2147483647" - } - } - specOpts = append(specOpts, customopts.WithSysctls(sysctls)) - - // Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile - - if !c.config.DisableCgroup { - specOpts = append(specOpts, customopts.WithDefaultSandboxShares) - } - - if res := config.GetLinux().GetResources(); res != nil { - specOpts = append(specOpts, - customopts.WithAnnotation(annotations.SandboxCPUPeriod, strconv.FormatInt(res.CpuPeriod, 10)), - customopts.WithAnnotation(annotations.SandboxCPUQuota, strconv.FormatInt(res.CpuQuota, 10)), - customopts.WithAnnotation(annotations.SandboxCPUShares, strconv.FormatInt(res.CpuShares, 10)), - customopts.WithAnnotation(annotations.SandboxMem, strconv.FormatInt(res.MemoryLimitInBytes, 10))) - } - - specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj)) - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - runtimePodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, annotations.DefaultCRIAnnotations(id, "", "", config, true)...) - - return c.runtimeSpec(id, "", specOpts...) -} - -// sandboxContainerSpecOpts generates OCI spec options for -// the sandbox container. -func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - var ( - securityContext = config.GetLinux().GetSecurityContext() - specOpts []oci.SpecOpts - err error - ) - ssp := securityContext.GetSeccomp() - if ssp == nil { - ssp, err = generateSeccompSecurityProfile( - securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet - c.config.UnsetSeccompProfile) - if err != nil { - return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) - } - } - seccompSpecOpts, err := c.generateSeccompSpecOpts( - ssp, - securityContext.GetPrivileged(), - c.seccompEnabled()) - if err != nil { - return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err) - } - if seccompSpecOpts != nil { - specOpts = append(specOpts, seccompSpecOpts) - } - - userstr, err := generateUserString( - "", - securityContext.GetRunAsUser(), - securityContext.GetRunAsGroup(), - ) - if err != nil { - return nil, fmt.Errorf("failed to generate user string: %w", err) - } - if userstr == "" { - // Lastly, since no user override was passed via CRI try to set via OCI - // Image - userstr = imageConfig.User - } - if userstr != "" { - specOpts = append(specOpts, oci.WithUser(userstr)) - } - return specOpts, nil -} - -// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts, -// /etc/resolv.conf and /etc/hostname. -func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - sandboxEtcHostname := c.getSandboxHostname(id) - hostname := config.GetHostname() - if hostname == "" { - var err error - hostname, err = c.os.Hostname() - if err != nil { - return fmt.Errorf("failed to get hostname: %w", err) - } - } - if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil { - return fmt.Errorf("failed to write hostname to %q: %w", sandboxEtcHostname, err) - } - - // TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet. - sandboxEtcHosts := c.getSandboxHosts(id) - if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil { - return fmt.Errorf("failed to generate sandbox hosts file %q: %w", sandboxEtcHosts, err) - } - - // Set DNS options. Maintain a resolv.conf for the sandbox. - var err error - resolvContent := "" - if dnsConfig := config.GetDnsConfig(); dnsConfig != nil { - resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options) - if err != nil { - return fmt.Errorf("failed to parse sandbox DNSConfig %+v: %w", dnsConfig, err) - } - } - resolvPath := c.getResolvPath(id) - if resolvContent == "" { - // copy host's resolv.conf to resolvPath - err = c.os.CopyFile(resolvConfPath, resolvPath, 0644) - if err != nil { - return fmt.Errorf("failed to copy host's resolv.conf to %q: %w", resolvPath, err) - } - } else { - err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644) - if err != nil { - return fmt.Errorf("failed to write resolv content to %q: %w", resolvPath, err) - } - } - - // Setup sandbox /dev/shm. - if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE { - if _, err := c.os.Stat(devShm); err != nil { - return fmt.Errorf("host %q is not available for host ipc: %w", devShm, err) - } - } else { - sandboxDevShm := c.getSandboxDevShm(id) - if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil { - return fmt.Errorf("failed to create sandbox shm: %w", err) - } - shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize) - if err := c.os.Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil { - return fmt.Errorf("failed to mount sandbox shm: %w", err) - } - } - - return nil -} - -// parseDNSOptions parse DNS options into resolv.conf format content, -// if none option is specified, will return empty with no error. -func parseDNSOptions(servers, searches, options []string) (string, error) { - resolvContent := "" - - if len(searches) > 0 { - resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " ")) - } - - if len(servers) > 0 { - resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver ")) - } - - if len(options) > 0 { - resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " ")) - } - - return resolvContent, nil -} - -// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to -// remove these files. Unmount should *NOT* return error if the mount point is already unmounted. -func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE { - path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/") - if err != nil { - return fmt.Errorf("failed to follow symlink: %w", err) - } - if err := c.os.Unmount(path); err != nil && !os.IsNotExist(err) { - return fmt.Errorf("failed to unmount %q: %w", path, err) - } - } - return nil -} - -func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { - for i := range spec.Linux.Namespaces { - if spec.Linux.Namespaces[i].Type == runtimespec.NetworkNamespace { - spec.Linux.Namespaces[i].Path = nsPath - break - } - } -} - -// sandboxSnapshotterOpts generates any platform specific snapshotter options -// for a sandbox container. -func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) { - nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions() - return snapshotterRemapOpts(nsOpts) -} diff --git a/pkg/cri/server/sandbox_run_linux_test.go b/pkg/cri/server/sandbox_run_linux_test.go deleted file mode 100644 index e0abd49ac..000000000 --- a/pkg/cri/server/sandbox_run_linux_test.go +++ /dev/null @@ -1,703 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "os" - "path/filepath" - "strconv" - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - v1 "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/opts" - ostesting "github.com/containerd/containerd/pkg/os/testing" -) - -func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { - config := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-name", - Uid: "test-uid", - Namespace: "test-ns", - Attempt: 1, - }, - Hostname: "test-hostname", - LogDirectory: "test-log-directory", - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - Linux: &runtime.LinuxPodSandboxConfig{ - CgroupParent: "/test/cgroup/parent", - }, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"a=b", "c=d"}, - Entrypoint: []string{"/pause"}, - Cmd: []string{"forever"}, - WorkingDir: "/workspace", - } - specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { - assert.Equal(t, "test-hostname", spec.Hostname) - assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath) - assert.Equal(t, relativeRootfsPath, spec.Root.Path) - assert.Equal(t, true, spec.Root.Readonly) - assert.Contains(t, spec.Process.Env, "a=b", "c=d") - assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args) - assert.Equal(t, "/workspace", spec.Process.Cwd) - assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, opts.DefaultSandboxCPUshares) - assert.EqualValues(t, *spec.Process.OOMScoreAdj, defaultSandboxOOMAdj) - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox) - - assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) - assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-ns") - - assert.Contains(t, spec.Annotations, annotations.SandboxUID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-uid") - - assert.Contains(t, spec.Annotations, annotations.SandboxName) - assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-name") - - assert.Contains(t, spec.Annotations, annotations.SandboxLogDir) - assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory") - - if selinux.GetEnabled() { - assert.NotEqual(t, "", spec.Process.SelinuxLabel) - assert.NotEqual(t, "", spec.Linux.MountLabel) - } - - assert.Contains(t, spec.Mounts, runtimespec.Mount{ - Source: "/test/root/sandboxes/test-id/resolv.conf", - Destination: resolvConfPath, - Type: "bind", - Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"}, - }) - - } - return config, imageConfig, specCheck -} - -func TestLinuxSandboxContainerSpec(t *testing.T) { - testID := "test-id" - nsPath := "test-cni" - idMap := runtime.IDMapping{ - HostId: 1000, - ContainerId: 1000, - Length: 10, - } - expIDMap := runtimespec.LinuxIDMapping{ - HostID: 1000, - ContainerID: 1000, - Size: 10, - } - - for _, test := range []struct { - desc string - configChange func(*runtime.PodSandboxConfig) - specCheck func(*testing.T, *runtimespec.Spec) - expectErr bool - }{ - { - desc: "spec should reflect original config", - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - // runtime spec should have expected namespaces enabled by default. - require.NotNil(t, spec.Linux) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - Path: nsPath, - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UTSNamespace, - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - }) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.IPCNamespace, - }) - assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") - assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UserNamespace, - }) - }, - }, - { - desc: "spec shouldn't have ping_group_range if userns are in use", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - }, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - require.NotNil(t, spec.Linux) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UserNamespace, - }) - assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") - }, - }, - { - desc: "host namespace", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_NODE, - Ipc: runtime.NamespaceMode_NODE, - }, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - // runtime spec should disable expected namespaces in host mode. - require.NotNil(t, spec.Linux) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.NetworkNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UTSNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.PIDNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.IPCNamespace, - }) - assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UserNamespace, - }) - assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") - assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") - }, - }, - { - desc: "user namespace", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_POD, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - }, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - require.NotNil(t, spec.Linux) - assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ - Type: runtimespec.UserNamespace, - }) - require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap}) - require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap}) - - }, - }, - { - desc: "user namespace mode node and mappings", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_NODE, - Uids: []*runtime.IDMapping{&idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - }, - } - }, - expectErr: true, - }, - { - desc: "user namespace with several mappings", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_NODE, - Uids: []*runtime.IDMapping{&idMap, &idMap}, - Gids: []*runtime.IDMapping{&idMap, &idMap}, - }, - }, - } - }, - expectErr: true, - }, - { - desc: "user namespace with uneven mappings", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_NODE, - Uids: []*runtime.IDMapping{&idMap, &idMap}, - Gids: []*runtime.IDMapping{&idMap}, - }, - }, - } - }, - expectErr: true, - }, - { - desc: "user namespace mode container", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_CONTAINER, - }, - }, - } - }, - expectErr: true, - }, - { - desc: "user namespace mode target", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode_TARGET, - }, - }, - } - }, - expectErr: true, - }, - { - desc: "user namespace unknown mode", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - UsernsOptions: &runtime.UserNamespace{ - Mode: runtime.NamespaceMode(100), - }, - }, - } - }, - expectErr: true, - }, - { - desc: "should set supplemental groups correctly", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - SupplementalGroups: []int64{1111, 2222}, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - require.NotNil(t, spec.Process) - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111)) - assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222)) - }, - }, - { - desc: "should overwrite default sysctls", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.Sysctls = map[string]string{ - "net.ipv4.ip_unprivileged_port_start": "500", - "net.ipv4.ping_group_range": "1 1000", - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - require.NotNil(t, spec.Process) - assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "500") - assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "1 1000") - }, - }, - { - desc: "sandbox sizing annotations should be set if LinuxContainerResources were provided", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.Resources = &v1.LinuxContainerResources{ - CpuPeriod: 100, - CpuQuota: 200, - CpuShares: 5000, - MemoryLimitInBytes: 1024, - } - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - value, ok := spec.Annotations[annotations.SandboxCPUPeriod] - assert.True(t, ok) - assert.EqualValues(t, strconv.FormatInt(100, 10), value) - assert.EqualValues(t, "100", value) - - value, ok = spec.Annotations[annotations.SandboxCPUQuota] - assert.True(t, ok) - assert.EqualValues(t, "200", value) - - value, ok = spec.Annotations[annotations.SandboxCPUShares] - assert.True(t, ok) - assert.EqualValues(t, "5000", value) - - value, ok = spec.Annotations[annotations.SandboxMem] - assert.True(t, ok) - assert.EqualValues(t, "1024", value) - }, - }, - { - desc: "sandbox sizing annotations should not be set if LinuxContainerResources were not provided", - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - _, ok := spec.Annotations[annotations.SandboxCPUPeriod] - assert.False(t, ok) - _, ok = spec.Annotations[annotations.SandboxCPUQuota] - assert.False(t, ok) - _, ok = spec.Annotations[annotations.SandboxCPUShares] - assert.False(t, ok) - _, ok = spec.Annotations[annotations.SandboxMem] - assert.False(t, ok) - }, - }, - { - desc: "sandbox sizing annotations are zero if the resources are set to 0", - configChange: func(c *runtime.PodSandboxConfig) { - c.Linux.Resources = &v1.LinuxContainerResources{} - }, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - value, ok := spec.Annotations[annotations.SandboxCPUPeriod] - assert.True(t, ok) - assert.EqualValues(t, "0", value) - value, ok = spec.Annotations[annotations.SandboxCPUQuota] - assert.True(t, ok) - assert.EqualValues(t, "0", value) - value, ok = spec.Annotations[annotations.SandboxCPUShares] - assert.True(t, ok) - assert.EqualValues(t, "0", value) - value, ok = spec.Annotations[annotations.SandboxMem] - assert.True(t, ok) - assert.EqualValues(t, "0", value) - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.config.EnableUnprivilegedICMP = true - c.config.EnableUnprivilegedPorts = true - config, imageConfig, specCheck := getRunPodSandboxTestData() - if test.configChange != nil { - test.configChange(config) - } - spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil) - if test.expectErr { - assert.Error(t, err) - assert.Nil(t, spec) - return - } - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, spec) - if test.specCheck != nil { - test.specCheck(t, spec) - } - }) - } -} - -func TestSetupSandboxFiles(t *testing.T) { - const ( - testID = "test-id" - realhostname = "test-real-hostname" - ) - for _, test := range []struct { - desc string - dnsConfig *runtime.DNSConfig - hostname string - ipcMode runtime.NamespaceMode - expectedCalls []ostesting.CalledDetail - }{ - { - desc: "should check host /dev/shm existence when ipc mode is NODE", - ipcMode: runtime.NamespaceMode_NODE, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "Hostname", - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte(realhostname + "\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/resolv.conf", - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - os.FileMode(0644), - }, - }, - { - Name: "Stat", - Arguments: []interface{}{"/dev/shm"}, - }, - }, - }, - { - desc: "should create new /etc/resolv.conf if DNSOptions is set", - dnsConfig: &runtime.DNSConfig{ - Servers: []string{"8.8.8.8"}, - Searches: []string{"114.114.114.114"}, - Options: []string{"timeout:1"}, - }, - ipcMode: runtime.NamespaceMode_NODE, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "Hostname", - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte(realhostname + "\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - []byte(`search 114.114.114.114 -nameserver 8.8.8.8 -options timeout:1 -`), os.FileMode(0644), - }, - }, - { - Name: "Stat", - Arguments: []interface{}{"/dev/shm"}, - }, - }, - }, - { - desc: "should create sandbox shm when ipc namespace mode is not NODE", - ipcMode: runtime.NamespaceMode_POD, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "Hostname", - }, - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte(realhostname + "\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/resolv.conf", - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - os.FileMode(0644), - }, - }, - { - Name: "MkdirAll", - Arguments: []interface{}{ - filepath.Join(testStateDir, sandboxesDir, testID, "shm"), - os.FileMode(0700), - }, - }, - { - Name: "Mount", - // Ignore arguments which are too complex to check. - }, - }, - }, - { - desc: "should create /etc/hostname when hostname is set", - hostname: "test-hostname", - ipcMode: runtime.NamespaceMode_NODE, - expectedCalls: []ostesting.CalledDetail{ - { - Name: "WriteFile", - Arguments: []interface{}{ - filepath.Join(testRootDir, sandboxesDir, testID, "hostname"), - []byte("test-hostname\n"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/hosts", - filepath.Join(testRootDir, sandboxesDir, testID, "hosts"), - os.FileMode(0644), - }, - }, - { - Name: "CopyFile", - Arguments: []interface{}{ - "/etc/resolv.conf", - filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"), - os.FileMode(0644), - }, - }, - { - Name: "Stat", - Arguments: []interface{}{"/dev/shm"}, - }, - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) { - return realhostname, nil - } - cfg := &runtime.PodSandboxConfig{ - Hostname: test.hostname, - DnsConfig: test.dnsConfig, - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Ipc: test.ipcMode, - }, - }, - }, - } - c.setupSandboxFiles(testID, cfg) - calls := c.os.(*ostesting.FakeOS).GetCalls() - assert.Len(t, calls, len(test.expectedCalls)) - for i, expected := range test.expectedCalls { - if expected.Arguments == nil { - // Ignore arguments. - expected.Arguments = calls[i].Arguments - } - assert.Equal(t, expected, calls[i]) - } - }) - } -} - -func TestParseDNSOption(t *testing.T) { - for _, test := range []struct { - desc string - servers []string - searches []string - options []string - expectedContent string - expectErr bool - }{ - { - desc: "empty dns options should return empty content", - }, - { - desc: "non-empty dns options should return correct content", - servers: []string{"8.8.8.8", "server.google.com"}, - searches: []string{"114.114.114.114"}, - options: []string{"timeout:1"}, - expectedContent: `search 114.114.114.114 -nameserver 8.8.8.8 -nameserver server.google.com -options timeout:1 -`, - }, - { - desc: "expanded dns config should return correct content on modern libc (e.g. glibc 2.26 and above)", - servers: []string{"8.8.8.8", "server.google.com"}, - searches: []string{ - "server0.google.com", - "server1.google.com", - "server2.google.com", - "server3.google.com", - "server4.google.com", - "server5.google.com", - "server6.google.com", - }, - options: []string{"timeout:1"}, - expectedContent: `search server0.google.com server1.google.com server2.google.com server3.google.com server4.google.com server5.google.com server6.google.com -nameserver 8.8.8.8 -nameserver server.google.com -options timeout:1 -`, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - resolvContent, err := parseDNSOptions(test.servers, test.searches, test.options) - if test.expectErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - assert.Equal(t, resolvContent, test.expectedContent) - }) - } -} - -func TestSandboxDisableCgroup(t *testing.T) { - config, imageConfig, _ := getRunPodSandboxTestData() - c := newTestCRIService() - c.config.DisableCgroup = true - spec, err := c.sandboxContainerSpec("test-id", config, imageConfig, "test-cni", []string{}) - require.NoError(t, err) - - t.Log("resource limit should not be set") - assert.Nil(t, spec.Linux.Resources.Memory) - assert.Nil(t, spec.Linux.Resources.CPU) - - t.Log("cgroup path should be empty") - assert.Empty(t, spec.Linux.CgroupsPath) -} - -// TODO(random-liu): [P1] Add unit test for different error cases to make sure -// the function cleans up on error properly. diff --git a/pkg/cri/server/sandbox_run_other.go b/pkg/cri/server/sandbox_run_other.go deleted file mode 100644 index 8fb7a3c9c..000000000 --- a/pkg/cri/server/sandbox_run_other.go +++ /dev/null @@ -1,60 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/snapshots" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) { - return c.runtimeSpec(id, "", annotations.DefaultCRIAnnotations(id, "", "", config, true)...) -} - -// sandboxContainerSpecOpts generates OCI spec options for -// the sandbox container. -func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - return []oci.SpecOpts{}, nil -} - -// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts, -// /etc/resolv.conf and /etc/hostname. -func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - return nil -} - -// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to -// remove these files. Unmount should *NOT* return error if the mount point is already unmounted. -func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - return nil -} - -func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { -} - -// sandboxSnapshotterOpts generates any platform specific snapshotter options -// for a sandbox container. -func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) { - return []snapshots.Opt{}, nil -} diff --git a/pkg/cri/server/sandbox_run_other_test.go b/pkg/cri/server/sandbox_run_other_test.go deleted file mode 100644 index b3a293fa3..000000000 --- a/pkg/cri/server/sandbox_run_other_test.go +++ /dev/null @@ -1,35 +0,0 @@ -//go:build !windows && !linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" -) - -func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { - config := &runtime.PodSandboxConfig{} - imageConfig := &imagespec.ImageConfig{} - specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { - } - return config, imageConfig, specCheck -} diff --git a/pkg/cri/server/sandbox_run_test.go b/pkg/cri/server/sandbox_run_test.go index b1bc1ec13..f414f2382 100644 --- a/pkg/cri/server/sandbox_run_test.go +++ b/pkg/cri/server/sandbox_run_test.go @@ -19,161 +19,13 @@ package server import ( "context" "net" - goruntime "runtime" "testing" - cni "github.com/containerd/go-cni" - "github.com/containerd/typeurl/v2" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/containerd/go-cni" "github.com/stretchr/testify/assert" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" ) -func TestSandboxContainerSpec(t *testing.T) { - switch goruntime.GOOS { - case "darwin": - t.Skip("not implemented on Darwin") - case "freebsd": - t.Skip("not implemented on FreeBSD") - } - testID := "test-id" - nsPath := "test-cni" - for _, test := range []struct { - desc string - configChange func(*runtime.PodSandboxConfig) - podAnnotations []string - imageConfigChange func(*imagespec.ImageConfig) - specCheck func(*testing.T, *runtimespec.Spec) - expectErr bool - }{ - { - desc: "should return error when entrypoint and cmd are empty", - imageConfigChange: func(c *imagespec.ImageConfig) { - c.Entrypoint = nil - c.Cmd = nil - }, - expectErr: true, - }, - { - desc: "a passthrough annotation should be passed as an OCI annotation", - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - }, - }, - { - desc: "a non-passthrough annotation should not be passed as an OCI annotation", - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["d"] = "e" - }, - podAnnotations: []string{"c"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["c"], "d") - _, ok := spec.Annotations["d"] - assert.False(t, ok) - }, - }, - { - desc: "passthrough annotations should support wildcard match", - configChange: func(c *runtime.PodSandboxConfig) { - c.Annotations["t.f"] = "j" - c.Annotations["z.g"] = "o" - c.Annotations["z"] = "o" - c.Annotations["y.ca"] = "b" - c.Annotations["y"] = "b" - }, - podAnnotations: []string{"t*", "z.*", "y.c*"}, - specCheck: func(t *testing.T, spec *runtimespec.Spec) { - assert.Equal(t, spec.Annotations["t.f"], "j") - assert.Equal(t, spec.Annotations["z.g"], "o") - assert.Equal(t, spec.Annotations["y.ca"], "b") - _, ok := spec.Annotations["y"] - assert.False(t, ok) - _, ok = spec.Annotations["z"] - assert.False(t, ok) - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - c := newTestCRIService() - config, imageConfig, specCheck := getRunPodSandboxTestData() - if test.configChange != nil { - test.configChange(config) - } - - if test.imageConfigChange != nil { - test.imageConfigChange(imageConfig) - } - spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, - test.podAnnotations) - if test.expectErr { - assert.Error(t, err) - assert.Nil(t, spec) - return - } - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, spec) - if test.specCheck != nil { - test.specCheck(t, spec) - } - }) - } -} - -func TestTypeurlMarshalUnmarshalSandboxMeta(t *testing.T) { - for _, test := range []struct { - desc string - configChange func(*runtime.PodSandboxConfig) - }{ - { - desc: "should marshal original config", - }, - { - desc: "should marshal Linux", - configChange: func(c *runtime.PodSandboxConfig) { - if c.Linux == nil { - c.Linux = &runtime.LinuxPodSandboxConfig{} - } - c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_NODE, - Ipc: runtime.NamespaceMode_NODE, - }, - SupplementalGroups: []int64{1111, 2222}, - } - }, - }, - } { - test := test - t.Run(test.desc, func(t *testing.T) { - meta := &sandboxstore.Metadata{ - ID: "1", - Name: "sandbox_1", - NetNSPath: "/home/cloud", - } - meta.Config, _, _ = getRunPodSandboxTestData() - if test.configChange != nil { - test.configChange(meta.Config) - } - - md, err := typeurl.MarshalAny(meta) - assert.NoError(t, err) - data, err := typeurl.UnmarshalAny(md) - assert.NoError(t, err) - assert.IsType(t, &sandboxstore.Metadata{}, data) - curMeta, ok := data.(*sandboxstore.Metadata) - assert.True(t, ok) - assert.Equal(t, meta, curMeta) - }) - } -} - func TestToCNIPortMappings(t *testing.T) { for _, test := range []struct { desc string @@ -305,7 +157,6 @@ func TestSelectPodIP(t *testing.T) { expectedAdditionalIPs: []string{"192.168.17.43"}, pref: "cni", }, - { desc: "ipv4 should be picked when there is only ipv4", ips: []string{"192.168.17.43"}, @@ -339,49 +190,3 @@ func TestSelectPodIP(t *testing.T) { }) } } - -func TestHostAccessingSandbox(t *testing.T) { - privilegedContext := &runtime.PodSandboxConfig{ - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - Privileged: true, - }, - }, - } - nonPrivilegedContext := &runtime.PodSandboxConfig{ - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - Privileged: false, - }, - }, - } - hostNamespace := &runtime.PodSandboxConfig{ - Linux: &runtime.LinuxPodSandboxConfig{ - SecurityContext: &runtime.LinuxSandboxSecurityContext{ - Privileged: false, - NamespaceOptions: &runtime.NamespaceOption{ - Network: runtime.NamespaceMode_NODE, - Pid: runtime.NamespaceMode_NODE, - Ipc: runtime.NamespaceMode_NODE, - }, - }, - }, - } - tests := []struct { - name string - config *runtime.PodSandboxConfig - want bool - }{ - {"Security Context is nil", nil, false}, - {"Security Context is privileged", privilegedContext, false}, - {"Security Context is not privileged", nonPrivilegedContext, false}, - {"Security Context namespace host access", hostNamespace, true}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := hostAccessingSandbox(tt.config); got != tt.want { - t.Errorf("hostAccessingSandbox() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/pkg/cri/server/sandbox_run_windows.go b/pkg/cri/server/sandbox_run_windows.go deleted file mode 100644 index 4d523e4b6..000000000 --- a/pkg/cri/server/sandbox_run_windows.go +++ /dev/null @@ -1,113 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "fmt" - "strconv" - - "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/snapshots" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/annotations" - customopts "github.com/containerd/containerd/pkg/cri/opts" -) - -func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig, - imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) { - // Creates a spec Generator with the default spec. - specOpts := []oci.SpecOpts{ - oci.WithEnv(imageConfig.Env), - oci.WithHostname(config.GetHostname()), - } - if imageConfig.WorkingDir != "" { - specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir)) - } - - if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 { - // Pause image must have entrypoint or cmd. - return nil, fmt.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig) - } - specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...)) - - specOpts = append(specOpts, - // Clear the root location since hcsshim expects it. - // NOTE: readonly rootfs doesn't work on windows. - customopts.WithoutRoot, - oci.WithWindowsNetworkNamespace(nsPath), - ) - - specOpts = append(specOpts, customopts.WithWindowsDefaultSandboxShares) - - // Start with the image config user and override below if RunAsUsername is not "". - username := imageConfig.User - - runAsUser := config.GetWindows().GetSecurityContext().GetRunAsUsername() - if runAsUser != "" { - username = runAsUser - } - - cs := config.GetWindows().GetSecurityContext().GetCredentialSpec() - if cs != "" { - specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs)) - } - - // There really isn't a good Windows way to verify that the username is available in the - // image as early as here like there is for Linux. Later on in the stack hcsshim - // will handle the behavior of erroring out if the user isn't available in the image - // when trying to run the init process. - specOpts = append(specOpts, oci.WithUser(username)) - - for pKey, pValue := range getPassthroughAnnotations(config.Annotations, - runtimePodAnnotations) { - specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue)) - } - - specOpts = append(specOpts, customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(config.GetWindows().GetSecurityContext().GetHostProcess()))) - specOpts = append(specOpts, - annotations.DefaultCRIAnnotations(id, "", "", config, true)..., - ) - - return c.runtimeSpec(id, "", specOpts...) -} - -// No sandbox container spec options for windows yet. -func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) { - return nil, nil -} - -// No sandbox files needed for windows. -func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - return nil -} - -// No sandbox files needed for windows. -func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error { - return nil -} - -func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { - spec.Windows.Network.NetworkNamespace = nsPath -} - -// No sandbox snapshotter options needed for windows. -func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) { - return []snapshots.Opt{}, nil -} diff --git a/pkg/cri/server/sandbox_run_windows_test.go b/pkg/cri/server/sandbox_run_windows_test.go deleted file mode 100644 index 358f2eacb..000000000 --- a/pkg/cri/server/sandbox_run_windows_test.go +++ /dev/null @@ -1,111 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "testing" - - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/assert" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/pkg/cri/annotations" - "github.com/containerd/containerd/pkg/cri/opts" -) - -func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) { - config := &runtime.PodSandboxConfig{ - Metadata: &runtime.PodSandboxMetadata{ - Name: "test-name", - Uid: "test-uid", - Namespace: "test-ns", - Attempt: 1, - }, - Hostname: "test-hostname", - LogDirectory: "test-log-directory", - Labels: map[string]string{"a": "b"}, - Annotations: map[string]string{"c": "d"}, - Windows: &runtime.WindowsPodSandboxConfig{ - SecurityContext: &runtime.WindowsSandboxSecurityContext{ - RunAsUsername: "test-user", - CredentialSpec: "{\"test\": \"spec\"}", - HostProcess: false, - }, - }, - } - imageConfig := &imagespec.ImageConfig{ - Env: []string{"a=b", "c=d"}, - Entrypoint: []string{"/pause"}, - Cmd: []string{"forever"}, - WorkingDir: "/workspace", - User: "test-image-user", - } - specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) { - assert.Equal(t, "test-hostname", spec.Hostname) - assert.Nil(t, spec.Root) - assert.Contains(t, spec.Process.Env, "a=b", "c=d") - assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args) - assert.Equal(t, "/workspace", spec.Process.Cwd) - assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, opts.DefaultSandboxCPUshares) - - // Also checks if override of the image configs user is behaving. - t.Logf("Check username") - assert.Contains(t, spec.Process.User.Username, "test-user") - - t.Logf("Check credential spec") - assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}") - - t.Logf("Check PodSandbox annotations") - assert.Contains(t, spec.Annotations, annotations.SandboxID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id) - - assert.Contains(t, spec.Annotations, annotations.ContainerType) - assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox) - - assert.Contains(t, spec.Annotations, annotations.SandboxNamespace) - assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-ns") - - assert.Contains(t, spec.Annotations, annotations.SandboxUID) - assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-uid") - - assert.Contains(t, spec.Annotations, annotations.SandboxName) - assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-name") - - assert.Contains(t, spec.Annotations, annotations.SandboxLogDir) - assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory") - - assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess) - assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false") - } - return config, imageConfig, specCheck -} - -func TestSandboxWindowsNetworkNamespace(t *testing.T) { - testID := "test-id" - nsPath := "test-cni" - c := newTestCRIService() - - config, imageConfig, specCheck := getRunPodSandboxTestData() - spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil) - assert.NoError(t, err) - assert.NotNil(t, spec) - specCheck(t, testID, spec) - assert.NotNil(t, spec.Windows) - assert.NotNil(t, spec.Windows.Network) - assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace) -} diff --git a/pkg/cri/server/sandbox_stats_other.go b/pkg/cri/server/sandbox_stats_other.go index 3ecdaa4bf..4f1a53e46 100644 --- a/pkg/cri/server/sandbox_stats_other.go +++ b/pkg/cri/server/sandbox_stats_other.go @@ -30,5 +30,5 @@ import ( func (c *criService) podSandboxStats( ctx context.Context, sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { - return nil, fmt.Errorf("metrics for sandbox not implemented: %w", errdefs.ErrNotImplemented) + return nil, fmt.Errorf("pod sandbox stats not implemented: %w", errdefs.ErrNotImplemented) } diff --git a/pkg/cri/server/sandbox_stats_windows.go b/pkg/cri/server/sandbox_stats_windows.go index 729238137..ba4ab2ddd 100644 --- a/pkg/cri/server/sandbox_stats_windows.go +++ b/pkg/cri/server/sandbox_stats_windows.go @@ -159,7 +159,7 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma // If snapshotstore doesn't have cached snapshot information // set WritableLayer usage to zero var usedBytes uint64 - sn, err := c.snapshotStore.Get(cntr.ID) + sn, err := c.GetSnapshot(cntr.ID) if err == nil { usedBytes = sn.Size } diff --git a/pkg/cri/server/sandbox_status.go b/pkg/cri/server/sandbox_status.go index 09b2b5d30..70afaea1e 100644 --- a/pkg/cri/server/sandbox_status.go +++ b/pkg/cri/server/sandbox_status.go @@ -18,16 +18,14 @@ package server import ( "context" - "encoding/json" "fmt" + "time" - "github.com/containerd/containerd" "github.com/containerd/containerd/errdefs" - cni "github.com/containerd/go-cni" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/go-cni" runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" ) // PodSandboxStatus returns the status of the PodSandbox. @@ -41,23 +39,45 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox if err != nil { return nil, fmt.Errorf("failed to get sandbox ip: %w", err) } - status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip, additionalIPs) - if status.GetCreatedAt() == 0 { - // CRI doesn't allow CreatedAt == 0. - info, err := sandbox.Container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get CreatedAt for sandbox container in %q state: %w", status.State, err) - } - status.CreatedAt = info.CreatedAt.UnixNano() - } - if !r.GetVerbose() { - return &runtime.PodSandboxStatusResponse{Status: status}, nil + + controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox controller: %w", err) } - // Generate verbose information. - info, err := toCRISandboxInfo(ctx, sandbox) + var ( + createdAt time.Time + state string + info map[string]string + ) + cstatus, err := controller.Status(ctx, sandbox.ID, r.GetVerbose()) if err != nil { - return nil, fmt.Errorf("failed to get verbose sandbox container info: %w", err) + // If the shim died unexpectedly (segfault etc.) let's set the state as + // NOTREADY and not just error out to make k8s and clients like crictl + // happy. If we get back ErrNotFound from controller.Status above while + // we're using the shim-mode controller, this is a decent indicator it + // exited unexpectedly. We can use the fact that we successfully retrieved + // the sandbox object from the store above to tell that this is true, otherwise + // if we followed the normal k8s convention of StopPodSandbox -> RemovePodSandbox, + // we wouldn't have that object in the store anymore. + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to query controller status: %w", err) + } + state = runtime.PodSandboxState_SANDBOX_NOTREADY.String() + } else { + state = cstatus.State + createdAt = cstatus.CreatedAt + info = cstatus.Info + } + + status := toCRISandboxStatus(sandbox.Metadata, state, createdAt, ip, additionalIPs) + if status.GetCreatedAt() == 0 { + // CRI doesn't allow CreatedAt == 0. + sandboxInfo, err := c.client.SandboxStore().Get(ctx, sandbox.ID) + if err != nil { + return nil, fmt.Errorf("failed to get sandbox %q from metadata store: %w", sandbox.ID, err) + } + status.CreatedAt = sandboxInfo.CreatedAt.UnixNano() } return &runtime.PodSandboxStatusResponse{ @@ -84,42 +104,6 @@ func (c *criService) getIPs(sandbox sandboxstore.Sandbox) (string, []string, err return sandbox.IP, sandbox.AdditionalIPs, nil } -// toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status. -func toCRISandboxStatus(meta sandboxstore.Metadata, status sandboxstore.Status, ip string, additionalIPs []string) *runtime.PodSandboxStatus { - // Set sandbox state to NOTREADY by default. - state := runtime.PodSandboxState_SANDBOX_NOTREADY - if status.State == sandboxstore.StateReady { - state = runtime.PodSandboxState_SANDBOX_READY - } - nsOpts := meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions() - var ips []*runtime.PodIP - for _, additionalIP := range additionalIPs { - ips = append(ips, &runtime.PodIP{Ip: additionalIP}) - } - return &runtime.PodSandboxStatus{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - State: state, - CreatedAt: status.CreatedAt.UnixNano(), - Network: &runtime.PodSandboxNetworkStatus{ - Ip: ip, - AdditionalIps: ips, - }, - Linux: &runtime.LinuxPodSandboxStatus{ - Namespaces: &runtime.Namespace{ - Options: &runtime.NamespaceOption{ - Network: nsOpts.GetNetwork(), - Pid: nsOpts.GetPid(), - Ipc: nsOpts.GetIpc(), - }, - }, - }, - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), - RuntimeHandler: meta.RuntimeHandler, - } -} - // SandboxInfo is extra information for sandbox. // TODO (mikebrow): discuss predefining constants structures for some or all of these field names in CRI type SandboxInfo struct { @@ -140,78 +124,38 @@ type SandboxInfo struct { CNIResult *cni.Result `json:"cniResult"` } -// toCRISandboxInfo converts internal container object information to CRI sandbox status response info map. -func toCRISandboxInfo(ctx context.Context, sandbox sandboxstore.Sandbox) (map[string]string, error) { - container := sandbox.Container - task, err := container.Task(ctx, nil) - if err != nil && !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to get sandbox container task: %w", err) +// toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status. +func toCRISandboxStatus(meta sandboxstore.Metadata, status string, createdAt time.Time, ip string, additionalIPs []string) *runtime.PodSandboxStatus { + // Set sandbox state to NOTREADY by default. + state := runtime.PodSandboxState_SANDBOX_NOTREADY + if value, ok := runtime.PodSandboxState_value[status]; ok { + state = runtime.PodSandboxState(value) } - - var processStatus containerd.ProcessStatus - if task != nil { - if taskStatus, err := task.Status(ctx); err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to get task status: %w", err) - } - processStatus = containerd.Unknown - } else { - processStatus = taskStatus.Status - } + nsOpts := meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions() + var ips []*runtime.PodIP + for _, additionalIP := range additionalIPs { + ips = append(ips, &runtime.PodIP{Ip: additionalIP}) } - - si := &SandboxInfo{ - Pid: sandbox.Status.Get().Pid, - RuntimeHandler: sandbox.RuntimeHandler, - Status: string(processStatus), - Config: sandbox.Config, - CNIResult: sandbox.CNIResult, + return &runtime.PodSandboxStatus{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + State: state, + CreatedAt: createdAt.UnixNano(), + Network: &runtime.PodSandboxNetworkStatus{ + Ip: ip, + AdditionalIps: ips, + }, + Linux: &runtime.LinuxPodSandboxStatus{ + Namespaces: &runtime.Namespace{ + Options: &runtime.NamespaceOption{ + Network: nsOpts.GetNetwork(), + Pid: nsOpts.GetPid(), + Ipc: nsOpts.GetIpc(), + }, + }, + }, + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + RuntimeHandler: meta.RuntimeHandler, } - - if si.Status == "" { - // If processStatus is empty, it means that the task is deleted. Apply "deleted" - // status which does not exist in containerd. - si.Status = "deleted" - } - - if sandbox.NetNS != nil { - // Add network closed information if sandbox is not using host network. - closed, err := sandbox.NetNS.Closed() - if err != nil { - return nil, fmt.Errorf("failed to check network namespace closed: %w", err) - } - si.NetNSClosed = closed - } - - spec, err := container.Spec(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox container runtime spec: %w", err) - } - si.RuntimeSpec = spec - - ctrInfo, err := container.Info(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get sandbox container info: %w", err) - } - // Do not use config.SandboxImage because the configuration might - // be changed during restart. It may not reflect the actual image - // used by the sandbox container. - si.Image = ctrInfo.Image - si.SnapshotKey = ctrInfo.SnapshotKey - si.Snapshotter = ctrInfo.Snapshotter - - runtimeOptions, err := getRuntimeOptions(ctrInfo) - if err != nil { - return nil, fmt.Errorf("failed to get runtime options: %w", err) - } - si.RuntimeType = ctrInfo.Runtime.Name - si.RuntimeOptions = runtimeOptions - - infoBytes, err := json.Marshal(si) - if err != nil { - return nil, fmt.Errorf("failed to marshal info %v: %w", si, err) - } - return map[string]string{ - "info": string(infoBytes), - }, nil } diff --git a/pkg/cri/server/sandbox_status_test.go b/pkg/cri/server/sandbox_status_test.go index 61eaacfeb..48fde0e24 100644 --- a/pkg/cri/server/sandbox_status_test.go +++ b/pkg/cri/server/sandbox_status_test.go @@ -89,33 +89,29 @@ func TestPodSandboxStatus(t *testing.T) { } for _, test := range []struct { desc string - state sandboxstore.State + state string expectedState runtime.PodSandboxState }{ { desc: "sandbox state ready", - state: sandboxstore.StateReady, + state: sandboxstore.StateReady.String(), expectedState: runtime.PodSandboxState_SANDBOX_READY, }, { desc: "sandbox state not ready", - state: sandboxstore.StateNotReady, + state: sandboxstore.StateNotReady.String(), expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, }, { desc: "sandbox state unknown", - state: sandboxstore.StateUnknown, + state: sandboxstore.StateUnknown.String(), expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY, }, } { test := test t.Run(test.desc, func(t *testing.T) { - status := sandboxstore.Status{ - CreatedAt: createdAt, - State: test.state, - } expected.State = test.expectedState - got := toCRISandboxStatus(metadata, status, ip, additionalIPs) + got := toCRISandboxStatus(metadata, test.state, createdAt, ip, additionalIPs) assert.Equal(t, expected, got) }) } diff --git a/pkg/cri/server/sandbox_stop.go b/pkg/cri/server/sandbox_stop.go index 5adca0a56..e957a84ed 100644 --- a/pkg/cri/server/sandbox_stop.go +++ b/pkg/cri/server/sandbox_stop.go @@ -20,17 +20,12 @@ import ( "context" "errors" "fmt" - "syscall" "time" - eventtypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/protobuf" "github.com/containerd/log" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" ) // StopPodSandbox stops the sandbox. If there are any running containers in the @@ -69,17 +64,20 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa } } - if err := c.cleanupSandboxFiles(id, sandbox.Config); err != nil { - return fmt.Errorf("failed to cleanup sandbox files: %w", err) - } - // Only stop sandbox container when it's running or unknown. state := sandbox.Status.Get().State if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown { - if err := c.stopSandboxContainer(ctx, sandbox); err != nil { - return fmt.Errorf("failed to stop sandbox container %q in %q state: %w", id, state, err) + // Use sandbox controller to stop sandbox + controller, err := c.getSandboxController(sandbox.Config, sandbox.RuntimeHandler) + if err != nil { + return fmt.Errorf("failed to get sandbox controller: %w", err) + } + + if err := controller.Stop(ctx, id); err != nil { + return fmt.Errorf("failed to stop sandbox %q: %w", id, err) } } + sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop) err := c.nri.StopPodSandbox(ctx, &sandbox) @@ -111,58 +109,6 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa return nil } -// stopSandboxContainer kills the sandbox container. -// `task.Delete` is not called here because it will be called when -// the event monitor handles the `TaskExit` event. -func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error { - id := sandbox.ID - container := sandbox.Container - state := sandbox.Status.Get().State - task, err := container.Task(ctx, nil) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to get sandbox container: %w", err) - } - // Don't return for unknown state, some cleanup needs to be done. - if state == sandboxstore.StateUnknown { - return c.cleanupUnknownSandbox(ctx, id, sandbox) - } - return nil - } - - // Handle unknown state. - // The cleanup logic is the same with container unknown state. - if state == sandboxstore.StateUnknown { - // Start an exit handler for containers in unknown state. - waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext()) - defer waitCancel() - exitCh, err := task.Wait(waitCtx) - if err != nil { - if !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to wait for task: %w", err) - } - return c.cleanupUnknownSandbox(ctx, id, sandbox) - } - - exitCtx, exitCancel := context.WithCancel(context.Background()) - stopCh := c.eventMonitor.startSandboxExitMonitor(exitCtx, id, task.Pid(), exitCh) - defer func() { - exitCancel() - // This ensures that exit monitor is stopped before - // `Wait` is cancelled, so no exit event is generated - // because of the `Wait` cancellation. - <-stopCh - }() - } - - // Kill the sandbox container. - if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("failed to kill sandbox container: %w", err) - } - - return c.waitSandboxStop(ctx, sandbox) -} - // waitSandboxStop waits for sandbox to be stopped until context is cancelled or // the context deadline is exceeded. func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox) error { @@ -201,15 +147,3 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor } return nil } - -// cleanupUnknownSandbox cleanup stopped sandbox in unknown state. -func (c *criService) cleanupUnknownSandbox(ctx context.Context, id string, sandbox sandboxstore.Sandbox) error { - // Reuse handleSandboxExit to do the cleanup. - return handleSandboxExit(ctx, &eventtypes.TaskExit{ - ContainerID: id, - ID: id, - Pid: 0, - ExitStatus: unknownExitCode, - ExitedAt: protobuf.ToTimestamp(time.Now()), - }, sandbox, c) -} diff --git a/pkg/cri/server/service.go b/pkg/cri/server/service.go index 5582b573c..4ebc4e36d 100644 --- a/pkg/cri/server/service.go +++ b/pkg/cri/server/service.go @@ -17,6 +17,7 @@ package server import ( + "context" "encoding/json" "fmt" "io" @@ -25,17 +26,19 @@ import ( "path/filepath" "sync" "sync/atomic" - "time" "github.com/containerd/containerd" "github.com/containerd/containerd/oci" "github.com/containerd/containerd/pkg/cri/instrument" "github.com/containerd/containerd/pkg/cri/nri" + "github.com/containerd/containerd/pkg/cri/server/images" + "github.com/containerd/containerd/pkg/cri/server/podsandbox" + imagestore "github.com/containerd/containerd/pkg/cri/store/image" + snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" "github.com/containerd/containerd/pkg/cri/streaming" - "github.com/containerd/containerd/pkg/kmutex" - "github.com/containerd/containerd/platforms" "github.com/containerd/containerd/plugin" - cni "github.com/containerd/go-cni" + "github.com/containerd/containerd/sandbox" + "github.com/containerd/go-cni" "github.com/containerd/log" "google.golang.org/grpc" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -44,9 +47,7 @@ import ( criconfig "github.com/containerd/containerd/pkg/cri/config" containerstore "github.com/containerd/containerd/pkg/cri/store/container" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" ctrdutil "github.com/containerd/containerd/pkg/cri/util" osinterface "github.com/containerd/containerd/pkg/os" "github.com/containerd/containerd/pkg/registrar" @@ -67,8 +68,23 @@ type CRIService interface { Register(*grpc.Server) error } +// imageService specifies dependencies to image service. +type imageService interface { + runtime.ImageServiceServer + + RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string + + UpdateImage(ctx context.Context, r string) error + + GetImage(id string) (imagestore.Image, error) + GetSnapshot(key string) (snapshotstore.Snapshot, error) + + LocalResolve(refOrID string) (imagestore.Image, error) +} + // criService implements CRIService. type criService struct { + imageService // config contains all configurations. config criconfig.Config // imageFSPath is the path to image filesystem. @@ -82,13 +98,12 @@ type criService struct { sandboxNameIndex *registrar.Registrar // containerStore stores all resources associated with containers. containerStore *containerstore.Store + // sandboxControllers contains different sandbox controller type, + // every controller controls sandbox lifecycle (and hides implementation details behind). + sandboxControllers map[criconfig.SandboxControllerMode]sandbox.Controller // containerNameIndex stores all container names and make sure each // name is unique. containerNameIndex *registrar.Registrar - // imageStore stores all resources associated with images. - imageStore *imagestore.Store - // snapshotStore stores information of all snapshots. - snapshotStore *snapshotstore.Store // netPlugin is used to setup and teardown network when run/stop pod sandbox. netPlugin map[string]cni.CNI // client is an instance of the containerd client @@ -108,45 +123,48 @@ type criService struct { // allCaps is the list of the capabilities. // When nil, parsed from CapEff of /proc/self/status. allCaps []string //nolint:nolintlint,unused // Ignore on non-Linux - // unpackDuplicationSuppressor is used to make sure that there is only - // one in-flight fetch request or unpack handler for a given descriptor's - // or chain ID. - unpackDuplicationSuppressor kmutex.KeyedLocker - // nri is used to hook NRI into CRI request processing. - nri *nri.API // containerEventsChan is used to capture container events and send them // to the caller of GetContainerEvents. containerEventsChan chan runtime.ContainerEventResponse + // nri is used to hook NRI into CRI request processing. + nri *nri.API } // NewCRIService returns a new instance of CRIService func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.API) (CRIService, error) { var err error labels := label.NewStore() + + if client.SnapshotService(config.ContainerdConfig.Snapshotter) == nil { + return nil, fmt.Errorf("failed to find snapshotter %q", config.ContainerdConfig.Snapshotter) + } + + imageFSPath := imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter) + log.L.Infof("Get image filesystem path %q", imageFSPath) + + // TODO: expose this as a separate containerd plugin. + imageService, err := images.NewService(config, imageFSPath, client) + if err != nil { + return nil, fmt.Errorf("unable to create CRI image service: %w", err) + } + c := &criService{ - config: config, - client: client, - os: osinterface.RealOS{}, - sandboxStore: sandboxstore.NewStore(labels), - containerStore: containerstore.NewStore(labels), - imageStore: imagestore.NewStore(client.ImageService(), client.ContentStore(), platforms.Default()), - snapshotStore: snapshotstore.NewStore(), - sandboxNameIndex: registrar.NewRegistrar(), - containerNameIndex: registrar.NewRegistrar(), - netPlugin: make(map[string]cni.CNI), - unpackDuplicationSuppressor: kmutex.New(), + imageService: imageService, + config: config, + client: client, + imageFSPath: imageFSPath, + os: osinterface.RealOS{}, + sandboxStore: sandboxstore.NewStore(labels), + containerStore: containerstore.NewStore(labels), + sandboxNameIndex: registrar.NewRegistrar(), + containerNameIndex: registrar.NewRegistrar(), + netPlugin: make(map[string]cni.CNI), + sandboxControllers: make(map[criconfig.SandboxControllerMode]sandbox.Controller), } // TODO: figure out a proper channel size. c.containerEventsChan = make(chan runtime.ContainerEventResponse, 1000) - if client.SnapshotService(c.config.ContainerdConfig.Snapshotter) == nil { - return nil, fmt.Errorf("failed to find snapshotter %q", c.config.ContainerdConfig.Snapshotter) - } - - c.imageFSPath = imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter) - log.L.Infof("Get image filesystem path %q", c.imageFSPath) - if err := c.initPlatform(); err != nil { return nil, fmt.Errorf("initialize platform: %w", err) } @@ -182,11 +200,21 @@ func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri. return nil, err } + // Load all sandbox controllers(pod sandbox controller and remote shim controller) + c.sandboxControllers[criconfig.ModePodSandbox] = podsandbox.New(config, client, c.sandboxStore, c.os, c, imageService, c.baseOCISpecs) + c.sandboxControllers[criconfig.ModeShim] = client.SandboxController() + c.nri = nri return c, nil } +// BackOffEvent is a temporary workaround to call eventMonitor from controller.Stop. +// TODO: get rid of this. +func (c *criService) BackOffEvent(id string, event interface{}) { + c.eventMonitor.backOff.enBackOff(id, event) +} + // Register registers all required services onto a specific grpc server. // This is used by containerd cri plugin. func (c *criService) Register(s *grpc.Server) error { @@ -216,15 +244,6 @@ func (c *criService) Run(ready func()) error { log.L.Info("Start event monitor") eventMonitorErrCh := c.eventMonitor.start() - // Start snapshot stats syncer, it doesn't need to be stopped. - log.L.Info("Start snapshots syncer") - snapshotsSyncer := newSnapshotsSyncer( - c.snapshotStore, - c.client.SnapshotService(c.config.ContainerdConfig.Snapshotter), - time.Duration(c.config.StatsCollectPeriod)*time.Second, - ) - snapshotsSyncer.start() - // Start CNI network conf syncers cniNetConfMonitorErrCh := make(chan error, len(c.cniNetConfMonitor)) var netSyncGroup sync.WaitGroup @@ -297,7 +316,6 @@ func (c *criService) Run(ready func()) error { if cniNetConfMonitorErr != nil { return fmt.Errorf("cni network conf monitor error: %w", cniNetConfMonitorErr) } - return nil } @@ -372,3 +390,16 @@ func loadBaseOCISpecs(config *criconfig.Config) (map[string]*oci.Spec, error) { return specs, nil } + +// ValidateMode validate the given mod value, +// returns err if mod is empty or unknown +func ValidateMode(modeStr string) error { + switch modeStr { + case string(criconfig.ModePodSandbox), string(criconfig.ModeShim): + return nil + case "": + return fmt.Errorf("empty sandbox controller mode") + default: + return fmt.Errorf("unknown sandbox controller mode: %s", modeStr) + } +} diff --git a/pkg/cri/server/service_linux.go b/pkg/cri/server/service_linux.go index 8cbaf5eac..0e3c42e42 100644 --- a/pkg/cri/server/service_linux.go +++ b/pkg/cri/server/service_linux.go @@ -20,11 +20,12 @@ import ( "fmt" "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/opencontainers/selinux/go-selinux" + "github.com/containerd/containerd/pkg/cap" "github.com/containerd/containerd/pkg/userns" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" "github.com/containerd/log" - "github.com/opencontainers/selinux/go-selinux" ) // networkAttachCount is the minimum number of networks the PodSandbox @@ -92,7 +93,7 @@ func (c *criService) initPlatform() (err error) { reg := cdi.GetRegistry() err = reg.Configure(cdi.WithSpecDirs(c.config.CDISpecDirs...)) if err != nil { - return fmt.Errorf("failed to configure CDI registry: %w", err) + return fmt.Errorf("failed to configure CDI registry") } } diff --git a/pkg/cri/server/service_other.go b/pkg/cri/server/service_other.go index 40e864a02..70e050f11 100644 --- a/pkg/cri/server/service_other.go +++ b/pkg/cri/server/service_other.go @@ -19,16 +19,17 @@ package server import ( - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" ) -// initPlatform handles initialization for the CRI service on non-Windows and non-Linux -// platforms. +// initPlatform handles initialization of the CRI service for non-windows +// and non-linux platforms. func (c *criService) initPlatform() error { return nil } -// cniLoadOptions returns cni load options for non-Windows and non-Linux platforms. +// cniLoadOptions returns cni load options for non-windows and non-linux +// platforms. func (c *criService) cniLoadOptions() []cni.Opt { return []cni.Opt{} } diff --git a/pkg/cri/server/service_test.go b/pkg/cri/server/service_test.go index 1a715092c..0ed4b4de0 100644 --- a/pkg/cri/server/service_test.go +++ b/pkg/cri/server/service_test.go @@ -22,18 +22,15 @@ import ( "testing" "github.com/containerd/containerd/oci" - "github.com/containerd/containerd/platforms" "github.com/containerd/go-cni" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" criconfig "github.com/containerd/containerd/pkg/cri/config" - servertesting "github.com/containerd/containerd/pkg/cri/server/testing" containerstore "github.com/containerd/containerd/pkg/cri/store/container" - imagestore "github.com/containerd/containerd/pkg/cri/store/image" "github.com/containerd/containerd/pkg/cri/store/label" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" - snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" + servertesting "github.com/containerd/containerd/pkg/cri/testing" ostesting "github.com/containerd/containerd/pkg/os/testing" "github.com/containerd/containerd/pkg/registrar" ) @@ -42,12 +39,10 @@ import ( func newTestCRIService() *criService { labels := label.NewStore() return &criService{ + imageService: &fakeImageService{}, config: testConfig, - imageFSPath: testImageFSPath, os: ostesting.NewFakeOS(), sandboxStore: sandboxstore.NewStore(labels), - imageStore: imagestore.NewStore(nil, nil, platforms.Default()), - snapshotStore: snapshotstore.NewStore(), sandboxNameIndex: registrar.NewRegistrar(), containerStore: containerstore.NewStore(labels), containerNameIndex: registrar.NewRegistrar(), @@ -87,3 +82,17 @@ func TestLoadBaseOCISpec(t *testing.T) { assert.Equal(t, "1.0.2", out.Version) assert.Equal(t, "default", out.Hostname) } + +func TestValidateMode(t *testing.T) { + mode := "" + assert.Error(t, ValidateMode(mode)) + + mode = "podsandbox" + assert.NoError(t, ValidateMode(mode)) + + mode = "shim" + assert.NoError(t, ValidateMode(mode)) + + mode = "nonexistent" + assert.Error(t, ValidateMode(mode)) +} diff --git a/pkg/cri/server/service_windows.go b/pkg/cri/server/service_windows.go index e71373164..47ec67ec4 100644 --- a/pkg/cri/server/service_windows.go +++ b/pkg/cri/server/service_windows.go @@ -19,14 +19,14 @@ package server import ( "fmt" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" ) // windowsNetworkAttachCount is the minimum number of networks the PodSandbox // attaches to const windowsNetworkAttachCount = 1 -// initPlatform handles Windows specific initialization for the CRI service. +// initPlatform handles windows specific initialization for the CRI service. func (c *criService) initPlatform() error { pluginDirs := map[string]string{ defaultNetworkPlugin: c.config.NetworkPluginConfDir, diff --git a/pkg/cri/server/snapshots.go b/pkg/cri/server/snapshots.go deleted file mode 100644 index 3f3bbd102..000000000 --- a/pkg/cri/server/snapshots.go +++ /dev/null @@ -1,119 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package server - -import ( - "context" - "fmt" - "time" - - "github.com/containerd/containerd/errdefs" - snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot" - ctrdutil "github.com/containerd/containerd/pkg/cri/util" - snapshot "github.com/containerd/containerd/snapshots" - "github.com/containerd/log" -) - -// snapshotsSyncer syncs snapshot stats periodically. imagefs info and container stats -// should both use cached result here. -// TODO(random-liu): Benchmark with high workload. We may need a statsSyncer instead if -// benchmark result shows that container cpu/memory stats also need to be cached. -type snapshotsSyncer struct { - store *snapshotstore.Store - snapshotter snapshot.Snapshotter - syncPeriod time.Duration -} - -// newSnapshotsSyncer creates a snapshot syncer. -func newSnapshotsSyncer(store *snapshotstore.Store, snapshotter snapshot.Snapshotter, - period time.Duration) *snapshotsSyncer { - return &snapshotsSyncer{ - store: store, - snapshotter: snapshotter, - syncPeriod: period, - } -} - -// start starts the snapshots syncer. No stop function is needed because -// the syncer doesn't update any persistent states, it's fine to let it -// exit with the process. -func (s *snapshotsSyncer) start() { - tick := time.NewTicker(s.syncPeriod) - go func() { - defer tick.Stop() - // TODO(random-liu): This is expensive. We should do benchmark to - // check the resource usage and optimize this. - for { - if err := s.sync(); err != nil { - log.L.WithError(err).Error("Failed to sync snapshot stats") - } - <-tick.C - } - }() -} - -// sync updates all snapshots stats. -func (s *snapshotsSyncer) sync() error { - ctx := ctrdutil.NamespacedContext() - start := time.Now().UnixNano() - var snapshots []snapshot.Info - // Do not call `Usage` directly in collect function, because - // `Usage` takes time, we don't want `Walk` to hold read lock - // of snapshot metadata store for too long time. - // TODO(random-liu): Set timeout for the following 2 contexts. - if err := s.snapshotter.Walk(ctx, func(ctx context.Context, info snapshot.Info) error { - snapshots = append(snapshots, info) - return nil - }); err != nil { - return fmt.Errorf("walk all snapshots failed: %w", err) - } - for _, info := range snapshots { - sn, err := s.store.Get(info.Name) - if err == nil { - // Only update timestamp for non-active snapshot. - if sn.Kind == info.Kind && sn.Kind != snapshot.KindActive { - sn.Timestamp = time.Now().UnixNano() - s.store.Add(sn) - continue - } - } - // Get newest stats if the snapshot is new or active. - sn = snapshotstore.Snapshot{ - Key: info.Name, - Kind: info.Kind, - Timestamp: time.Now().UnixNano(), - } - usage, err := s.snapshotter.Usage(ctx, info.Name) - if err != nil { - if !errdefs.IsNotFound(err) { - log.L.WithError(err).Errorf("Failed to get usage for snapshot %q", info.Name) - } - continue - } - sn.Size = uint64(usage.Size) - sn.Inodes = uint64(usage.Inodes) - s.store.Add(sn) - } - for _, sn := range s.store.List() { - if sn.Timestamp >= start { - continue - } - // Delete the snapshot stats if it's not updated this time. - s.store.Delete(sn.Key) - } - return nil -} diff --git a/pkg/cri/server/test_config.go b/pkg/cri/server/test_config.go index a2e663ab7..e7a0fd130 100644 --- a/pkg/cri/server/test_config.go +++ b/pkg/cri/server/test_config.go @@ -25,7 +25,6 @@ const ( // TODO(random-liu): Change this to image name after we have complete image // management unit test framework. testSandboxImage = "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113798" // #nosec G101 - testImageFSPath = "/test/image/fs/path" ) var testConfig = criconfig.Config{ diff --git a/pkg/cri/server/update_runtime_config.go b/pkg/cri/server/update_runtime_config.go index cf2e2abee..e0a44e7b7 100644 --- a/pkg/cri/server/update_runtime_config.go +++ b/pkg/cri/server/update_runtime_config.go @@ -77,7 +77,6 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) return &runtime.UpdateRuntimeConfigResponse{}, nil } - netStart := time.Now() err = netPlugin.Status() networkPluginOperations.WithValues(networkStatusOp).Inc() diff --git a/pkg/cri/server/update_runtime_config_test.go b/pkg/cri/server/update_runtime_config_test.go index bed07902b..51822272e 100644 --- a/pkg/cri/server/update_runtime_config_test.go +++ b/pkg/cri/server/update_runtime_config_test.go @@ -28,7 +28,7 @@ import ( runtime "k8s.io/cri-api/pkg/apis/runtime/v1" criconfig "github.com/containerd/containerd/pkg/cri/config" - servertesting "github.com/containerd/containerd/pkg/cri/server/testing" + servertesting "github.com/containerd/containerd/pkg/cri/testing" ) func TestUpdateRuntimeConfig(t *testing.T) { diff --git a/pkg/cri/server/testing/fake_cni_plugin.go b/pkg/cri/testing/fake_cni_plugin.go similarity index 100% rename from pkg/cri/server/testing/fake_cni_plugin.go rename to pkg/cri/testing/fake_cni_plugin.go diff --git a/script/test/cri-integration.sh b/script/test/cri-integration.sh index fbc8019b2..40d9aa4f1 100755 --- a/script/test/cri-integration.sh +++ b/script/test/cri-integration.sh @@ -44,10 +44,6 @@ test_setup "${REPORT_DIR}" CMD="" if [ -n "${sudo}" ]; then CMD+="${sudo} " - # sudo strips environment variables, so add DISABLE_CRI_SANDBOXES back if present - if [ -n "${DISABLE_CRI_SANDBOXES}" ]; then - CMD+="DISABLE_CRI_SANDBOXES='${DISABLE_CRI_SANDBOXES}' " - fi fi CMD+="${PWD}/bin/cri-integration.test" diff --git a/script/test/utils.sh b/script/test/utils.sh index a516e7031..619d434b9 100755 --- a/script/test/utils.sh +++ b/script/test/utils.sh @@ -222,10 +222,6 @@ run_containerd() { CMD="" if [ -n "${sudo}" ]; then CMD+="${sudo} " - # sudo strips environment variables, so add DISABLE_CRI_SANDBOXES back if present - if [ -n "${DISABLE_CRI_SANDBOXES}" ]; then - CMD+="DISABLE_CRI_SANDBOXES='${DISABLE_CRI_SANDBOXES}' " - fi fi CMD+="${PWD}/bin/containerd"