Move CRI from pkg/ to internal/

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
This commit is contained in:
Maksym Pavlenko
2024-02-02 09:45:44 -08:00
parent db1e16da34
commit bbac058cf3
215 changed files with 254 additions and 254 deletions

View File

@@ -1,125 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package annotations
import (
customopts "github.com/containerd/containerd/v2/pkg/cri/opts"
"github.com/containerd/containerd/v2/pkg/oci"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ContainerType values
// Following OCI annotations are used by katacontainers now.
// We'll switch to standard secure pod API after it is defined in CRI.
const (
// ContainerTypeSandbox represents a pod sandbox container
ContainerTypeSandbox = "sandbox"
// ContainerTypeContainer represents a container running within a pod
ContainerTypeContainer = "container"
// ContainerType is the container type (sandbox or container) annotation
ContainerType = "io.kubernetes.cri.container-type"
// SandboxID is the sandbox ID annotation
SandboxID = "io.kubernetes.cri.sandbox-id"
// SandboxCPU annotations are based on the initial CPU configuration for the sandbox. This is calculated as the
// sum of container CPU resources, optionally provided by Kubelet (introduced in 1.23) as part of the PodSandboxConfig
SandboxCPUPeriod = "io.kubernetes.cri.sandbox-cpu-period"
SandboxCPUQuota = "io.kubernetes.cri.sandbox-cpu-quota"
SandboxCPUShares = "io.kubernetes.cri.sandbox-cpu-shares"
// SandboxMemory is the initial amount of memory associated with this sandbox. This is calculated as the sum
// of container memory, optionally provided by Kubelet (introduced in 1.23) as part of the PodSandboxConfig.
SandboxMem = "io.kubernetes.cri.sandbox-memory"
// SandboxLogDir is the pod log directory annotation.
// If the sandbox needs to generate any log, it will put it into this directory.
// Kubelet will be responsible for:
// 1) Monitoring the disk usage of the log, and including it as part of the pod
// ephemeral storage usage.
// 2) Cleaning up the logs when the pod is deleted.
// NOTE: Kubelet is not responsible for rotating the logs.
SandboxLogDir = "io.kubernetes.cri.sandbox-log-directory"
// UntrustedWorkload is the sandbox annotation for untrusted workload. Untrusted
// workload can only run on dedicated runtime for untrusted workload.
UntrustedWorkload = "io.kubernetes.cri.untrusted-workload"
// SandboxNamespace is the name of the namespace of the sandbox (pod)
SandboxNamespace = "io.kubernetes.cri.sandbox-namespace"
// SandboxUID is the uid of the sandbox (pod) passed to CRI via RunPodSanbox,
// this field is useful for linking the uid created by the CRI client (e.g. kubelet)
// to the internal Sandbox.ID created by the containerd sandbox service
SandboxUID = "io.kubernetes.cri.sandbox-uid"
// SandboxName is the name of the sandbox (pod)
SandboxName = "io.kubernetes.cri.sandbox-name"
// ContainerName is the name of the container in the pod
ContainerName = "io.kubernetes.cri.container-name"
// ImageName is the name of the image used to create the container
ImageName = "io.kubernetes.cri.image-name"
// PodAnnotations are the annotations of the pod
PodAnnotations = "io.kubernetes.cri.pod-annotations"
// RuntimeHandler an experimental annotation key for getting runtime handler from pod annotations.
// See https://github.com/containerd/containerd/issues/6657 and https://github.com/containerd/containerd/pull/6899 for details.
// The value of this annotation should be the runtime for sandboxes.
// e.g. for [plugins.cri.containerd.runtimes.runc] runtime config, this value should be runc
// TODO: we should deprecate this annotation as soon as kubelet supports passing RuntimeHandler from PullImageRequest
RuntimeHandler = "io.containerd.cri.runtime-handler"
// WindowsHostProcess is used by hcsshim to identify windows pods that are running HostProcesses
WindowsHostProcess = "microsoft.com/hostprocess-container"
)
// DefaultCRIAnnotations are the default set of CRI annotations to
// pass to sandboxes and containers.
func DefaultCRIAnnotations(
sandboxID string,
containerName string,
imageName string,
config *runtime.PodSandboxConfig,
sandbox bool,
) []oci.SpecOpts {
opts := []oci.SpecOpts{
customopts.WithAnnotation(SandboxID, sandboxID),
customopts.WithAnnotation(SandboxNamespace, config.GetMetadata().GetNamespace()),
customopts.WithAnnotation(SandboxUID, config.GetMetadata().GetUid()),
customopts.WithAnnotation(SandboxName, config.GetMetadata().GetName()),
}
ctrType := ContainerTypeContainer
if sandbox {
ctrType = ContainerTypeSandbox
// Sandbox log dir only gets passed for sandboxes, the other metadata always
// gets sent however.
opts = append(opts, customopts.WithAnnotation(SandboxLogDir, config.GetLogDirectory()))
} else {
// Image name and container name only get passed for containers.s
opts = append(
opts,
customopts.WithAnnotation(ContainerName, containerName),
customopts.WithAnnotation(ImageName, imageName),
)
}
return append(opts, customopts.WithAnnotation(ContainerType, ctrType))
}

View File

@@ -1,34 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package bandwidth provides utilities for bandwidth shaping
package bandwidth

View File

@@ -1,72 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package bandwidth
import (
"github.com/containerd/errdefs"
"k8s.io/apimachinery/pkg/api/resource"
)
// FakeShaper provides an implementation of the bandwidth.Shaper.
// Beware this is implementation has no features besides Reset and GetCIDRs.
type FakeShaper struct {
CIDRs []string
ResetCIDRs []string
}
// Limit is not implemented
func (f *FakeShaper) Limit(cidr string, egress, ingress *resource.Quantity) error {
return errdefs.ErrNotImplemented
}
// Reset appends a particular CIDR to the set of ResetCIDRs being managed by this shaper
func (f *FakeShaper) Reset(cidr string) error {
f.ResetCIDRs = append(f.ResetCIDRs, cidr)
return nil
}
// ReconcileInterface is not implemented
func (f *FakeShaper) ReconcileInterface() error {
return errdefs.ErrNotImplemented
}
// ReconcileCIDR is not implemented
func (f *FakeShaper) ReconcileCIDR(cidr string, egress, ingress *resource.Quantity) error {
return errdefs.ErrNotImplemented
}
// GetCIDRs returns the set of CIDRs that are being managed by this shaper
func (f *FakeShaper) GetCIDRs() ([]string, error) {
return f.CIDRs, nil
}

View File

@@ -1,56 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package bandwidth
import "k8s.io/apimachinery/pkg/api/resource"
// Shaper is designed so that the shaper structs created
// satisfy the Shaper interface.
type Shaper interface {
// Limit the bandwidth for a particular CIDR on a particular interface
// * ingress and egress are in bits/second
// * cidr is expected to be a valid network CIDR (e.g. '1.2.3.4/32' or '10.20.0.1/16')
// 'egress' bandwidth limit applies to all packets on the interface whose source matches 'cidr'
// 'ingress' bandwidth limit applies to all packets on the interface whose destination matches 'cidr'
// Limits are aggregate limits for the CIDR, not per IP address. CIDRs must be unique, but can be overlapping, traffic
// that matches multiple CIDRs counts against all limits.
Limit(cidr string, egress, ingress *resource.Quantity) error
// Reset removes a bandwidth limit for a particular CIDR on a particular network interface
Reset(cidr string) error
// ReconcileInterface reconciles the interface managed by this shaper with the state on the ground.
ReconcileInterface() error
// ReconcileCIDR reconciles a CIDR managed by this shaper with the state on the ground
ReconcileCIDR(cidr string, egress, ingress *resource.Quantity) error
// GetCIDRs returns the set of CIDRs that are being managed by this shaper
GetCIDRs() ([]string, error)
}

View File

@@ -1,361 +0,0 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package bandwidth
import (
"bufio"
"bytes"
"encoding/hex"
"fmt"
"net"
"regexp"
"strings"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/utils/exec"
"k8s.io/klog/v2"
)
var (
classShowMatcher = regexp.MustCompile(`class htb (1:\d+)`)
classAndHandleMatcher = regexp.MustCompile(`filter parent 1:.*fh (\d+::\d+).*flowid (\d+:\d+)`)
)
// tcShaper provides an implementation of the Shaper interface on Linux using the 'tc' tool.
// In general, using this requires that the caller posses the NET_CAP_ADMIN capability, though if you
// do this within an container, it only requires the NS_CAPABLE capability for manipulations to that
// container's network namespace.
// Uses the hierarchical token bucket queuing discipline (htb), this requires Linux 2.4.20 or newer
// or a custom kernel with that queuing discipline backported.
type tcShaper struct {
e exec.Interface
iface string
}
// NewTCShaper makes a new tcShaper for the given interface
func NewTCShaper(iface string) Shaper {
shaper := &tcShaper{
e: exec.New(),
iface: iface,
}
return shaper
}
func (t *tcShaper) execAndLog(cmdStr string, args ...string) error {
klog.V(6).Infof("Running: %s %s", cmdStr, strings.Join(args, " "))
cmd := t.e.Command(cmdStr, args...)
out, err := cmd.CombinedOutput()
klog.V(6).Infof("Output from tc: %s", string(out))
return err
}
func (t *tcShaper) nextClassID() (int, error) {
data, err := t.e.Command("tc", "class", "show", "dev", t.iface).CombinedOutput()
if err != nil {
return -1, err
}
scanner := bufio.NewScanner(bytes.NewBuffer(data))
classes := sets.Set[string]{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// skip empty lines
if len(line) == 0 {
continue
}
// expected tc line:
// class htb 1:1 root prio 0 rate 1000Kbit ceil 1000Kbit burst 1600b cburst 1600b
matches := classShowMatcher.FindStringSubmatch(line)
if len(matches) != 2 {
return -1, fmt.Errorf("unexpected output from tc: %s (%v)", scanner.Text(), matches)
}
classes.Insert(matches[1])
}
// Make sure it doesn't go forever
for nextClass := 1; nextClass < 10000; nextClass++ {
if !classes.Has(fmt.Sprintf("1:%d", nextClass)) {
return nextClass, nil
}
}
// This should really never happen
return -1, fmt.Errorf("exhausted class space, please try again")
}
// Convert a CIDR from text to a hex representation
// Strips any masked parts of the IP, so 1.2.3.4/16 becomes hex(1.2.0.0)/ffffffff
func hexCIDR(cidr string) (string, error) {
ip, ipnet, err := net.ParseCIDR(cidr)
if err != nil {
return "", err
}
ip = ip.Mask(ipnet.Mask)
hexIP := hex.EncodeToString([]byte(ip))
hexMask := ipnet.Mask.String()
return hexIP + "/" + hexMask, nil
}
// Convert a CIDR from hex representation to text, opposite of the above.
func asciiCIDR(cidr string) (string, error) {
parts := strings.Split(cidr, "/")
if len(parts) != 2 {
return "", fmt.Errorf("unexpected CIDR format: %s", cidr)
}
ipData, err := hex.DecodeString(parts[0])
if err != nil {
return "", err
}
ip := net.IP(ipData)
maskData, err := hex.DecodeString(parts[1])
if err != nil {
return "", err
}
mask := net.IPMask(maskData)
size, _ := mask.Size()
return fmt.Sprintf("%s/%d", ip.String(), size), nil
}
func (t *tcShaper) findCIDRClass(cidr string) (classAndHandleList [][]string, found bool, err error) {
data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput()
if err != nil {
return classAndHandleList, false, err
}
hex, err := hexCIDR(cidr)
if err != nil {
return classAndHandleList, false, err
}
spec := fmt.Sprintf("match %s", hex)
scanner := bufio.NewScanner(bytes.NewBuffer(data))
filter := ""
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if len(line) == 0 {
continue
}
if strings.HasPrefix(line, "filter") {
filter = line
continue
}
if strings.Contains(line, spec) {
// expected tc line:
// `filter parent 1: protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1` (old version) or
// `filter parent 1: protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw` (new version)
matches := classAndHandleMatcher.FindStringSubmatch(filter)
if len(matches) != 3 {
return classAndHandleList, false, fmt.Errorf("unexpected output from tc: %s %d (%v)", filter, len(matches), matches)
}
resultTmp := []string{matches[2], matches[1]}
classAndHandleList = append(classAndHandleList, resultTmp)
}
}
if len(classAndHandleList) > 0 {
return classAndHandleList, true, nil
}
return classAndHandleList, false, nil
}
func makeKBitString(rsrc *resource.Quantity) string {
return fmt.Sprintf("%dkbit", (rsrc.Value() / 1000))
}
func (t *tcShaper) makeNewClass(rate string) (int, error) {
class, err := t.nextClassID()
if err != nil {
return -1, err
}
if err := t.execAndLog("tc", "class", "add",
"dev", t.iface,
"parent", "1:",
"classid", fmt.Sprintf("1:%d", class),
"htb", "rate", rate); err != nil {
return -1, err
}
return class, nil
}
func (t *tcShaper) Limit(cidr string, upload, download *resource.Quantity) (err error) {
var downloadClass, uploadClass int
if download != nil {
if downloadClass, err = t.makeNewClass(makeKBitString(download)); err != nil {
return err
}
if err := t.execAndLog("tc", "filter", "add",
"dev", t.iface,
"protocol", "ip",
"parent", "1:0",
"prio", "1", "u32",
"match", "ip", "dst", cidr,
"flowid", fmt.Sprintf("1:%d", downloadClass)); err != nil {
return err
}
}
if upload != nil {
if uploadClass, err = t.makeNewClass(makeKBitString(upload)); err != nil {
return err
}
if err := t.execAndLog("tc", "filter", "add",
"dev", t.iface,
"protocol", "ip",
"parent", "1:0",
"prio", "1", "u32",
"match", "ip", "src", cidr,
"flowid", fmt.Sprintf("1:%d", uploadClass)); err != nil {
return err
}
}
return nil
}
// tests to see if an interface exists, if it does, return true and the status line for the interface
// returns false, "", <err> if an error occurs.
func (t *tcShaper) interfaceExists() (bool, string, error) {
data, err := t.e.Command("tc", "qdisc", "show", "dev", t.iface).CombinedOutput()
if err != nil {
return false, "", err
}
value := strings.TrimSpace(string(data))
if len(value) == 0 {
return false, "", nil
}
// Newer versions of tc and/or the kernel return the following instead of nothing:
// qdisc noqueue 0: root refcnt 2
fields := strings.Fields(value)
if len(fields) > 1 && fields[1] == "noqueue" {
return false, "", nil
}
return true, value, nil
}
func (t *tcShaper) ReconcileCIDR(cidr string, upload, download *resource.Quantity) error {
_, found, err := t.findCIDRClass(cidr)
if err != nil {
return err
}
if !found {
return t.Limit(cidr, upload, download)
}
// TODO: actually check bandwidth limits here
return nil
}
func (t *tcShaper) ReconcileInterface() error {
exists, output, err := t.interfaceExists()
if err != nil {
return err
}
if !exists {
klog.V(4).Info("Didn't find bandwidth interface, creating")
return t.initializeInterface()
}
fields := strings.Split(output, " ")
if len(fields) < 12 || fields[1] != "htb" || fields[2] != "1:" {
if err := t.deleteInterface(fields[2]); err != nil {
return err
}
return t.initializeInterface()
}
return nil
}
func (t *tcShaper) initializeInterface() error {
return t.execAndLog("tc", "qdisc", "add", "dev", t.iface, "root", "handle", "1:", "htb", "default", "30")
}
func (t *tcShaper) Reset(cidr string) error {
classAndHandle, found, err := t.findCIDRClass(cidr)
if err != nil {
return err
}
if !found {
return fmt.Errorf("Failed to find cidr: %s on interface: %s", cidr, t.iface)
}
for i := 0; i < len(classAndHandle); i++ {
if err := t.execAndLog("tc", "filter", "del",
"dev", t.iface,
"parent", "1:",
"proto", "ip",
"prio", "1",
"handle", classAndHandle[i][1], "u32"); err != nil {
return err
}
if err := t.execAndLog("tc", "class", "del",
"dev", t.iface,
"parent", "1:",
"classid", classAndHandle[i][0]); err != nil {
return err
}
}
return nil
}
func (t *tcShaper) deleteInterface(class string) error {
return t.execAndLog("tc", "qdisc", "delete", "dev", t.iface, "root", "handle", class)
}
func (t *tcShaper) GetCIDRs() ([]string, error) {
data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput()
if err != nil {
return nil, err
}
result := []string{}
scanner := bufio.NewScanner(bytes.NewBuffer(data))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if len(line) == 0 {
continue
}
if strings.Contains(line, "match") {
parts := strings.Split(line, " ")
// expected tc line:
// match <cidr> at <number>
if len(parts) != 4 {
return nil, fmt.Errorf("unexpected output: %v", parts)
}
cidr, err := asciiCIDR(parts[1])
if err != nil {
return nil, err
}
result = append(result, cidr)
}
}
return result, nil
}

View File

@@ -1,69 +0,0 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package bandwidth
import (
"github.com/containerd/errdefs"
"k8s.io/apimachinery/pkg/api/resource"
)
type unsupportedShaper struct {
}
// NewTCShaper makes a new unsupportedShapper for the given interface
func NewTCShaper(iface string) Shaper {
return &unsupportedShaper{}
}
func (f *unsupportedShaper) Limit(cidr string, egress, ingress *resource.Quantity) error {
return errdefs.ErrNotImplemented
}
func (f *unsupportedShaper) Reset(cidr string) error {
return nil
}
func (f *unsupportedShaper) ReconcileInterface() error {
return errdefs.ErrNotImplemented
}
func (f *unsupportedShaper) ReconcileCIDR(cidr string, egress, ingress *resource.Quantity) error {
return errdefs.ErrNotImplemented
}
func (f *unsupportedShaper) GetCIDRs() ([]string, error) {
return []string{}, nil
}

View File

@@ -1,82 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package bandwidth
import (
"fmt"
"k8s.io/apimachinery/pkg/api/resource"
)
var minRsrc = resource.MustParse("1k")
var maxRsrc = resource.MustParse("1P")
func validateBandwidthIsReasonable(rsrc *resource.Quantity) error {
if rsrc.Value() < minRsrc.Value() {
return fmt.Errorf("resource is unreasonably small (< 1kbit)")
}
if rsrc.Value() > maxRsrc.Value() {
return fmt.Errorf("resource is unreasonably large (> 1Pbit)")
}
return nil
}
// ExtractPodBandwidthResources extracts the ingress and egress from the given pod annotations
func ExtractPodBandwidthResources(podAnnotations map[string]string) (ingress, egress *resource.Quantity, err error) {
if podAnnotations == nil {
return nil, nil, nil
}
str, found := podAnnotations["kubernetes.io/ingress-bandwidth"]
if found {
ingressValue, err := resource.ParseQuantity(str)
if err != nil {
return nil, nil, err
}
ingress = &ingressValue
if err := validateBandwidthIsReasonable(ingress); err != nil {
return nil, nil, err
}
}
str, found = podAnnotations["kubernetes.io/egress-bandwidth"]
if found {
egressValue, err := resource.ParseQuantity(str)
if err != nil {
return nil, nil, err
}
egress = &egressValue
if err := validateBandwidthIsReasonable(egress); err != nil {
return nil, nil, err
}
}
return ingress, egress, nil
}

View File

@@ -1,806 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"context"
"errors"
"fmt"
"net/url"
goruntime "runtime"
"strconv"
"time"
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
apitypes "github.com/containerd/containerd/v2/api/types"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
"github.com/pelletier/go-toml/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubelet/pkg/cri/streaming"
runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
runcoptions "github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
"github.com/containerd/containerd/v2/pkg/cri/annotations"
"github.com/containerd/containerd/v2/pkg/deprecation"
runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
"github.com/containerd/containerd/v2/plugins"
"github.com/opencontainers/image-spec/specs-go"
"github.com/opencontainers/runtime-spec/specs-go/features"
)
func init() {
const prefix = "types.containerd.io"
major := strconv.Itoa(specs.VersionMajor)
typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
}
const (
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
//
// NOTE:
//
// This ImagePullProgressTimeout feature is ported from kubelet/dockershim's
// --image-pull-progress-deadline. The original value is 1m0. Unlike docker
// daemon, the containerd doesn't have global concurrent download limitation
// before migrating to Transfer Service. If kubelet runs with concurrent
// image pull, the node will run under IO pressure. The ImagePull process
// could be impacted by self, if the target image is large one with a
// lot of layers. And also both container's writable layers and image's storage
// share one disk. The ImagePull process commits blob to content store
// with fsync, which might bring the unrelated files' dirty pages into
// disk in one transaction [1]. The 1m0 value isn't good enough. Based
// on #9347 case and kubernetes community's usage [2], the default value
// is updated to 5m0. If end-user still runs into unexpected cancel,
// they need to config it based on their environment.
//
// [1]: Fast commits for ext4 - https://lwn.net/Articles/842385/
// [2]: https://github.com/kubernetes/kubernetes/blob/1635c380b26a1d8cc25d36e9feace9797f4bae3c/cluster/gce/util.sh#L882
defaultImagePullProgressTimeoutDuration = 5 * time.Minute
)
type SandboxControllerMode string
const (
// ModePodSandbox means use Controller implementation from sbserver podsandbox package.
// We take this one as a default mode.
ModePodSandbox SandboxControllerMode = "podsandbox"
// ModeShim means use whatever Controller implementation provided by shim.
ModeShim SandboxControllerMode = "shim"
// DefaultSandboxImage is the default image to use for sandboxes when empty or
// for default configurations.
DefaultSandboxImage = "registry.k8s.io/pause:3.9"
)
// Ternary represents a ternary value.
// Ternary is needed because TOML does not accept "null" for boolean values.
type Ternary = string
const (
TernaryEmpty Ternary = "" // alias for IfPossible
TernaryEnabled Ternary = "Enabled"
TernaryIfPossible Ternary = "IfPossible"
TernaryDisabled Ternary = "Disabled"
)
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
// and a runtime for untrusted workload.
type Runtime struct {
// Type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
Type string `toml:"runtime_type" json:"runtimeType"`
// Path is an optional field that can be used to overwrite path to a shim runtime binary.
// When specified, containerd will ignore runtime name field when resolving shim location.
// Path must be abs.
Path string `toml:"runtime_path" json:"runtimePath"`
// PodAnnotations is a list of pod annotations passed to both pod sandbox as well as
// container OCI annotations.
PodAnnotations []string `toml:"pod_annotations" json:"PodAnnotations"`
// ContainerAnnotations is a list of container annotations passed through to the OCI config of the containers.
// Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
// Currently, only device plugins populate the annotations.
ContainerAnnotations []string `toml:"container_annotations" json:"ContainerAnnotations"`
// Options are config options for the runtime.
Options map[string]interface{} `toml:"options" json:"options"`
// PrivilegedWithoutHostDevices overloads the default behaviour for adding host devices to the
// runtime spec when the container is privileged. Defaults to false.
PrivilegedWithoutHostDevices bool `toml:"privileged_without_host_devices" json:"privileged_without_host_devices"`
// PrivilegedWithoutHostDevicesAllDevicesAllowed overloads the default behaviour device allowlisting when
// to the runtime spec when the container when PrivilegedWithoutHostDevices is already enabled. Requires
// PrivilegedWithoutHostDevices to be enabled. Defaults to false.
PrivilegedWithoutHostDevicesAllDevicesAllowed bool `toml:"privileged_without_host_devices_all_devices_allowed" json:"privileged_without_host_devices_all_devices_allowed"`
// BaseRuntimeSpec is a json file with OCI spec to use as base spec that all container's will be created from.
BaseRuntimeSpec string `toml:"base_runtime_spec" json:"baseRuntimeSpec"`
// NetworkPluginConfDir is a directory containing the CNI network information for the runtime class.
NetworkPluginConfDir string `toml:"cni_conf_dir" json:"cniConfDir"`
// NetworkPluginMaxConfNum is the max number of plugin config files that will
// be loaded from the cni config directory by go-cni. Set the value to 0 to
// load all config files (no arbitrary limit). The legacy default value is 1.
NetworkPluginMaxConfNum int `toml:"cni_max_conf_num" json:"cniMaxConfNum"`
// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
// while using default snapshotters for operational simplicity.
// See https://github.com/containerd/containerd/issues/6657 for details.
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
// Sandboxer defines which sandbox runtime to use when scheduling pods
// This features requires the new CRI server implementation (enabled by default in 2.0)
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
// podsandbox - means use Controller implementation from sbserver podsandbox package.
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
// treats read-only mounts as recursive read-only mounts.
// An empty string means "IfPossible".
// "Enabled" requires Linux kernel v5.12 or later.
// Introduced in containerd v2.0.
// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
TreatRoMountsAsRro Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
TreatRoMountsAsRroResolved bool `toml:"-" json:"-"` // Do not set manually
}
// ContainerdConfig contains toml config related to containerd
type ContainerdConfig struct {
// DefaultRuntimeName is the default runtime name to use from the runtimes table.
DefaultRuntimeName string `toml:"default_runtime_name" json:"defaultRuntimeName"`
// Runtimes is a map from CRI RuntimeHandler strings, which specify types of runtime
// configurations, to the matching configurations.
Runtimes map[string]Runtime `toml:"runtimes" json:"runtimes"`
// IgnoreBlockIONotEnabledErrors is a boolean flag to ignore
// blockio related errors when blockio support has not been
// enabled.
IgnoreBlockIONotEnabledErrors bool `toml:"ignore_blockio_not_enabled_errors" json:"ignoreBlockIONotEnabledErrors"`
// IgnoreRdtNotEnabledErrors is a boolean flag to ignore RDT related errors
// when RDT support has not been enabled.
IgnoreRdtNotEnabledErrors bool `toml:"ignore_rdt_not_enabled_errors" json:"ignoreRdtNotEnabledErrors"`
}
// CniConfig contains toml config related to cni
type CniConfig struct {
// NetworkPluginBinDir is the directory in which the binaries for the plugin is kept.
NetworkPluginBinDir string `toml:"bin_dir" json:"binDir"`
// NetworkPluginConfDir is the directory in which the admin places a CNI conf.
NetworkPluginConfDir string `toml:"conf_dir" json:"confDir"`
// NetworkPluginMaxConfNum is the max number of plugin config files that will
// be loaded from the cni config directory by go-cni. Set the value to 0 to
// load all config files (no arbitrary limit). The legacy default value is 1.
NetworkPluginMaxConfNum int `toml:"max_conf_num" json:"maxConfNum"`
// NetworkPluginSetupSerially is a boolean flag to specify whether containerd sets up networks serially
// if there are multiple CNI plugin config files existing and NetworkPluginMaxConfNum is larger than 1.
//
// NOTE: On the Linux platform, containerd provides loopback network
// configuration by default. There are at least two network plugins.
// The default value of NetworkPluginSetupSerially is false which means
// the loopback and eth0 are handled in parallel mode. Since the loopback
// device is created as the net namespace is created, it's safe to run
// in parallel mode as the default setting.
NetworkPluginSetupSerially bool `toml:"setup_serially" json:"setupSerially"`
// NetworkPluginConfTemplate is the file path of golang template used to generate cni config.
// When it is set, containerd will get cidr(s) from kubelet to replace {{.PodCIDR}},
// {{.PodCIDRRanges}} or {{.Routes}} in the template, and write the config into
// NetworkPluginConfDir.
// Ideally the cni config should be placed by system admin or cni daemon like calico,
// weaveworks etc. However, this is useful for the cases when there is no cni daemonset to place cni config.
// This allowed for very simple generic networking using the Kubernetes built in node pod CIDR IPAM, avoiding the
// need to fetch the node object through some external process (which has scalability, auth, complexity issues).
// It is currently heavily used in kubernetes-containerd CI testing
// NetworkPluginConfTemplate was once deprecated in containerd v1.7.0,
// but its deprecation was cancelled in v1.7.3.
NetworkPluginConfTemplate string `toml:"conf_template" json:"confTemplate"`
// IPPreference specifies the strategy to use when selecting the main IP address for a pod.
//
// Options include:
// * ipv4, "" - (default) select the first ipv4 address
// * ipv6 - select the first ipv6 address
// * cni - use the order returned by the CNI plugins, returning the first IP address from the results
IPPreference string `toml:"ip_pref" json:"ipPref"`
}
// Mirror contains the config related to the registry mirror
type Mirror struct {
// Endpoints are endpoints for a namespace. CRI plugin will try the endpoints
// one by one until a working one is found. The endpoint must be a valid url
// with host specified.
// The scheme, host and path from the endpoint URL will be used.
Endpoints []string `toml:"endpoint" json:"endpoint"`
}
// AuthConfig contains the config related to authentication to a specific registry
type AuthConfig struct {
// Username is the username to login the registry.
Username string `toml:"username" json:"username"`
// Password is the password to login the registry.
Password string `toml:"password" json:"password"`
// Auth is a base64 encoded string from the concatenation of the username,
// a colon, and the password.
Auth string `toml:"auth" json:"auth"`
// IdentityToken is used to authenticate the user and get
// an access token for the registry.
IdentityToken string `toml:"identitytoken" json:"identitytoken"`
}
// Registry is registry settings configured
type Registry struct {
// ConfigPath is a path to the root directory containing registry-specific
// configurations.
// If ConfigPath is set, the rest of the registry specific options are ignored.
ConfigPath string `toml:"config_path" json:"configPath"`
// Mirrors are namespace to mirror mapping for all namespaces.
// This option will not be used when ConfigPath is provided.
// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.0.
Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
// Configs are configs for each registry.
// The key is the domain name or IP of the registry.
// DEPRECATED: Use ConfigPath instead.
Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
// Auths are registry endpoint to auth config mapping. The registry endpoint must
// be a valid url with host specified.
// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.0, supported in 1.x releases.
Auths map[string]AuthConfig `toml:"auths" json:"auths"`
// Headers adds additional HTTP headers that get sent to all registries
Headers map[string][]string `toml:"headers" json:"headers"`
}
// RegistryConfig contains configuration used to communicate with the registry.
type RegistryConfig struct {
// Auth contains information to authenticate to the registry.
Auth *AuthConfig `toml:"auth" json:"auth"`
}
// ImageDecryption contains configuration to handling decryption of encrypted container images.
type ImageDecryption struct {
// KeyModel specifies the trust model of where keys should reside.
//
// Details of field usage can be found in:
// https://github.com/containerd/containerd/tree/main/docs/cri/config.md
//
// Details of key models can be found in:
// https://github.com/containerd/containerd/tree/main/docs/cri/decryption.md
KeyModel string `toml:"key_model" json:"keyModel"`
}
// ImagePlatform represents the platform to use for an image including the
// snapshotter to use. If snapshotter is not provided, the platform default
// can be assumed. When platform is not provided, the default platform can
// be assumed
type ImagePlatform struct {
Platform string `toml:"platform" json:"platform"`
// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
// while using default snapshotters for operational simplicity.
// See https://github.com/containerd/containerd/issues/6657 for details.
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
}
type ImageConfig struct {
// Snapshotter is the snapshotter used by containerd.
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
// DisableSnapshotAnnotations disables to pass additional annotations (image
// related information) to snapshotters. These annotations are required by
// stargz snapshotter (https://github.com/containerd/stargz-snapshotter).
DisableSnapshotAnnotations bool `toml:"disable_snapshot_annotations" json:"disableSnapshotAnnotations"`
// DiscardUnpackedLayers is a boolean flag to specify whether to allow GC to
// remove layers from the content store after successfully unpacking these
// layers to the snapshotter.
DiscardUnpackedLayers bool `toml:"discard_unpacked_layers" json:"discardUnpackedLayers"`
// PinnedImages are images which the CRI plugin uses and should not be
// removed by the CRI client. The images have a key which can be used
// by other plugins to lookup the current image name.
// Image names should be full names including domain and tag
// Examples:
// "sandbox": "k8s.gcr.io/pause:3.9"
// "base": "docker.io/library/ubuntu:latest"
// Migrated from:
// (PluginConfig).SandboxImage string `toml:"sandbox_image" json:"sandboxImage"`
PinnedImages map[string]string
// RuntimePlatforms is map between the runtime and the image platform to
// use for that runtime. When resolving an image for a runtime, this
// mapping will be used to select the image for the platform and the
// snapshotter for unpacking.
RuntimePlatforms map[string]ImagePlatform `toml:"runtime_platforms" json:"runtimePlatforms"`
// Registry contains config related to the registry
Registry Registry `toml:"registry" json:"registry"`
// ImageDecryption contains config related to handling decryption of encrypted container images
ImageDecryption `toml:"image_decryption" json:"imageDecryption"`
// MaxConcurrentDownloads restricts the number of concurrent downloads for each image.
// TODO: Migrate to transfer service
MaxConcurrentDownloads int `toml:"max_concurrent_downloads" json:"maxConcurrentDownloads"`
// ImagePullProgressTimeout is the maximum duration that there is no
// image data read from image registry in the open connection. It will
// be reset whatever a new byte has been read. If timeout, the image
// pulling will be cancelled. A zero value means there is no timeout.
//
// The string is in the golang duration format, see:
// https://golang.org/pkg/time/#ParseDuration
ImagePullProgressTimeout string `toml:"image_pull_progress_timeout" json:"imagePullProgressTimeout"`
// ImagePullWithSyncFs is an experimental setting. It's to force sync
// filesystem during unpacking to ensure that data integrity.
// TODO: Migrate to transfer service
ImagePullWithSyncFs bool `toml:"image_pull_with_sync_fs" json:"imagePullWithSyncFs"`
// StatsCollectPeriod is the period (in seconds) of snapshots stats collection.
StatsCollectPeriod int `toml:"stats_collect_period" json:"statsCollectPeriod"`
}
// RuntimeConfig contains toml config related to CRI plugin,
// it is a subset of Config.
type RuntimeConfig struct {
// ContainerdConfig contains config related to containerd
ContainerdConfig `toml:"containerd" json:"containerd"`
// CniConfig contains config related to cni
CniConfig `toml:"cni" json:"cni"`
// EnableSelinux indicates to enable the selinux support.
EnableSelinux bool `toml:"enable_selinux" json:"enableSelinux"`
// SelinuxCategoryRange allows the upper bound on the category range to be set.
// If not specified or set to 0, defaults to 1024 from the selinux package.
SelinuxCategoryRange int `toml:"selinux_category_range" json:"selinuxCategoryRange"`
// MaxContainerLogLineSize is the maximum log line size in bytes for a container.
// Log line longer than the limit will be split into multiple lines. Non-positive
// value means no limit.
MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogSize"`
// DisableCgroup indicates to disable the cgroup support.
// This is useful when the containerd does not have permission to access cgroup.
DisableCgroup bool `toml:"disable_cgroup" json:"disableCgroup"`
// DisableApparmor indicates to disable the apparmor support.
// This is useful when the containerd does not have permission to access Apparmor.
DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"`
// RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's
// current OOMScoreADj.
// This is useful when the containerd does not have permission to decrease OOMScoreAdj.
RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"`
// DisableProcMount disables Kubernetes ProcMount support. This MUST be set to `true`
// when using containerd with Kubernetes <=1.11.
DisableProcMount bool `toml:"disable_proc_mount" json:"disableProcMount"`
// UnsetSeccompProfile is the profile containerd/cri will use If the provided seccomp profile is
// unset (`""`) for a container (default is `unconfined`)
UnsetSeccompProfile string `toml:"unset_seccomp_profile" json:"unsetSeccompProfile"`
// TolerateMissingHugetlbController if set to false will error out on create/update
// container requests with huge page limits if the cgroup controller for hugepages is not present.
// This helps with supporting Kubernetes <=1.18 out of the box. (default is `true`)
TolerateMissingHugetlbController bool `toml:"tolerate_missing_hugetlb_controller" json:"tolerateMissingHugetlbController"`
// DisableHugetlbController indicates to silently disable the hugetlb controller, even when it is
// present in /sys/fs/cgroup/cgroup.controllers.
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
// isolation, security and early detection of issues in the mount configuration when using
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
IgnoreImageDefinedVolumes bool `toml:"ignore_image_defined_volumes" json:"ignoreImageDefinedVolumes"`
// NetNSMountsUnderStateDir places all mounts for network namespaces under StateDir/netns instead
// of being placed under the hardcoded directory /var/run/netns. Changing this setting requires
// that all containers are deleted.
NetNSMountsUnderStateDir bool `toml:"netns_mounts_under_state_dir" json:"netnsMountsUnderStateDir"`
// EnableUnprivilegedPorts configures net.ipv4.ip_unprivileged_port_start=0
// for all containers which are not using host network
// and if it is not overwritten by PodSandboxConfig
// Note that currently default is set to disabled but target change it in future, see:
// https://github.com/kubernetes/kubernetes/issues/102612
EnableUnprivilegedPorts bool `toml:"enable_unprivileged_ports" json:"enableUnprivilegedPorts"`
// EnableUnprivilegedICMP configures net.ipv4.ping_group_range="0 2147483647"
// for all containers which are not using host network, are not running in user namespace
// and if it is not overwritten by PodSandboxConfig
// Note that currently default is set to disabled but target change it in future together with EnableUnprivilegedPorts
EnableUnprivilegedICMP bool `toml:"enable_unprivileged_icmp" json:"enableUnprivilegedICMP"`
// EnableCDI indicates to enable injection of the Container Device Interface Specifications
// into the OCI config
// For more details about CDI and the syntax of CDI Spec files please refer to
// https://tags.cncf.io/container-device-interface.
EnableCDI bool `toml:"enable_cdi" json:"enableCDI"`
// CDISpecDirs is the list of directories to scan for Container Device Interface Specifications
// For more details about CDI configuration please refer to
// https://tags.cncf.io/container-device-interface#containerd-configuration
CDISpecDirs []string `toml:"cdi_spec_dirs" json:"cdiSpecDirs"`
// DrainExecSyncIOTimeout is the maximum duration to wait for ExecSync
// API' IO EOF event after exec init process exits. A zero value means
// there is no timeout.
//
// The string is in the golang duration format, see:
// https://golang.org/pkg/time/#ParseDuration
//
// For example, the value can be '5h', '2h30m', '10s'.
DrainExecSyncIOTimeout string `toml:"drain_exec_sync_io_timeout" json:"drainExecSyncIOTimeout"`
}
// X509KeyPairStreaming contains the x509 configuration for streaming
type X509KeyPairStreaming struct {
// TLSCertFile is the path to a certificate file
TLSCertFile string `toml:"tls_cert_file" json:"tlsCertFile"`
// TLSKeyFile is the path to a private key file
TLSKeyFile string `toml:"tls_key_file" json:"tlsKeyFile"`
}
// Config contains all configurations for CRI runtime plugin.
type Config struct {
// RuntimeConfig is the config for CRI runtime.
RuntimeConfig
// ContainerdRootDir is the root directory path for containerd.
ContainerdRootDir string `json:"containerdRootDir"`
// ContainerdEndpoint is the containerd endpoint path.
ContainerdEndpoint string `json:"containerdEndpoint"`
// RootDir is the root directory path for managing cri plugin files
// (metadata checkpoint etc.)
RootDir string `json:"rootDir"`
// StateDir is the root directory path for managing volatile pod/container data
StateDir string `json:"stateDir"`
}
// ServerConfig contains all the configuration for the CRI API server.
type ServerConfig struct {
// DisableTCPService disables serving CRI on the TCP server.
DisableTCPService bool `toml:"disable_tcp_service" json:"disableTCPService"`
// StreamServerAddress is the ip address streaming server is listening on.
StreamServerAddress string `toml:"stream_server_address" json:"streamServerAddress"`
// StreamServerPort is the port streaming server is listening on.
StreamServerPort string `toml:"stream_server_port" json:"streamServerPort"`
// StreamIdleTimeout is the maximum time a streaming connection
// can be idle before the connection is automatically closed.
// The string is in the golang duration format, see:
// https://golang.org/pkg/time/#ParseDuration
StreamIdleTimeout string `toml:"stream_idle_timeout" json:"streamIdleTimeout"`
// EnableTLSStreaming indicates to enable the TLS streaming support.
EnableTLSStreaming bool `toml:"enable_tls_streaming" json:"enableTLSStreaming"`
// X509KeyPairStreaming is a x509 key pair used for TLS streaming
X509KeyPairStreaming `toml:"x509_key_pair_streaming" json:"x509KeyPairStreaming"`
}
const (
// RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
RuntimeUntrusted = "untrusted"
// RuntimeDefault is the implicit runtime defined for ContainerdConfig.DefaultRuntime
RuntimeDefault = "default"
// KeyModelNode is the key model where key for encrypted images reside
// on the worker nodes
KeyModelNode = "node"
)
// ValidateImageConfig validates the given image configuration
func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.Warning, error) {
var warnings []deprecation.Warning
useConfigPath := c.Registry.ConfigPath != ""
if len(c.Registry.Mirrors) > 0 {
if useConfigPath {
return warnings, errors.New("`mirrors` cannot be set when `config_path` is provided")
}
warnings = append(warnings, deprecation.CRIRegistryMirrors)
log.G(ctx).Warning("`mirrors` is deprecated, please use `config_path` instead")
}
if len(c.Registry.Configs) != 0 {
warnings = append(warnings, deprecation.CRIRegistryConfigs)
log.G(ctx).Warning("`configs` is deprecated, please use `config_path` instead")
}
// Validation for deprecated auths options and mapping it to configs.
if len(c.Registry.Auths) != 0 {
if c.Registry.Configs == nil {
c.Registry.Configs = make(map[string]RegistryConfig)
}
for endpoint, auth := range c.Registry.Auths {
auth := auth
u, err := url.Parse(endpoint)
if err != nil {
return warnings, fmt.Errorf("failed to parse registry url %q from `registry.auths`: %w", endpoint, err)
}
if u.Scheme != "" {
// Do not include the scheme in the new registry config.
endpoint = u.Host
}
config := c.Registry.Configs[endpoint]
config.Auth = &auth
c.Registry.Configs[endpoint] = config
}
warnings = append(warnings, deprecation.CRIRegistryAuths)
log.G(ctx).Warning("`auths` is deprecated, please use `ImagePullSecrets` instead")
}
// Validation for image_pull_progress_timeout
if c.ImagePullProgressTimeout != "" {
if _, err := time.ParseDuration(c.ImagePullProgressTimeout); err != nil {
return warnings, fmt.Errorf("invalid image pull progress timeout: %w", err)
}
}
return warnings, nil
}
func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
if introspectionClient == nil { // happens for unit tests
return nil, errors.New("introspectionClient is nil")
}
infoReq := &introspectionapi.PluginInfoRequest{
Type: string(plugins.RuntimePluginV2),
ID: "task",
}
rr := &apitypes.RuntimeRequest{
RuntimePath: r.Type,
}
if r.Path != "" {
rr.RuntimePath = r.Path
}
options, err := GenerateRuntimeOptions(r)
if err != nil {
return nil, err
}
rr.Options, err = protobuf.MarshalAnyToProto(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
}
infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
if err != nil {
return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
}
infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
if err != nil {
return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
}
var info apitypes.RuntimeInfo
if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
}
featuresX, err := typeurl.UnmarshalAny(info.Features)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
}
features, ok := featuresX.(*features.Features)
if !ok {
return nil, fmt.Errorf("unknown features type %T", featuresX)
}
return features, nil
}
// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
debugPrefix := "treat_ro_mounts_as_rro"
if r.Type != "" {
debugPrefix += fmt.Sprintf("[%s]", r.Type)
}
if binaryName := r.Options["BinaryName"]; binaryName != "" {
debugPrefix += fmt.Sprintf("[%v]", binaryName)
}
debugPrefix += ": "
var runtimeSupportsRro bool
if r.Type == plugins.RuntimeRuncV2 {
features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
if err != nil {
log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
} else {
log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
for _, s := range features.MountOptions {
if s == "rro" {
runtimeSupportsRro = true
break
}
}
}
}
switch r.TreatRoMountsAsRro {
case TernaryDisabled:
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
return false, nil
case TernaryEnabled:
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
if !kernelSupportsRro {
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
}
if !runtimeSupportsRro {
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
}
return true, nil
case TernaryEmpty, TernaryIfPossible:
if r.Type != plugins.RuntimeRuncV2 {
log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
return false, nil
}
if !kernelSupportsRro {
msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
if goruntime.GOOS == "linux" {
msg += " (Hint: upgrade the kernel to v5.12 or later)"
log.G(ctx).Warn(msg)
} else {
log.G(ctx).Debug(msg)
}
return false, nil
}
if !runtimeSupportsRro {
log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
return false, nil
}
log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
return true, nil
default:
return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
}
}
// ValidateRuntimeConfig validates the given runtime configuration.
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
var warnings []deprecation.Warning
if c.ContainerdConfig.Runtimes == nil {
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
}
// Validation for default_runtime_name
if c.ContainerdConfig.DefaultRuntimeName == "" {
return warnings, errors.New("`default_runtime_name` is empty")
}
if _, ok := c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName]; !ok {
return warnings, fmt.Errorf("no corresponding runtime configured in `containerd.runtimes` for `containerd` `default_runtime_name = \"%s\"", c.ContainerdConfig.DefaultRuntimeName)
}
for k, r := range c.ContainerdConfig.Runtimes {
if !r.PrivilegedWithoutHostDevices && r.PrivilegedWithoutHostDevicesAllDevicesAllowed {
return warnings, errors.New("`privileged_without_host_devices_all_devices_allowed` requires `privileged_without_host_devices` to be enabled")
}
// If empty, use default podSandbox mode
if len(r.Sandboxer) == 0 {
r.Sandboxer = string(ModePodSandbox)
}
// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
var err error
r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
if err != nil {
return warnings, err
}
c.ContainerdConfig.Runtimes[k] = r
}
// Validation for drain_exec_sync_io_timeout
if c.DrainExecSyncIOTimeout != "" {
if _, err := time.ParseDuration(c.DrainExecSyncIOTimeout); err != nil {
return warnings, fmt.Errorf("invalid `drain_exec_sync_io_timeout`: %w", err)
}
}
if err := ValidateEnableUnprivileged(ctx, c); err != nil {
return warnings, err
}
return warnings, nil
}
// ValidateServerConfig validates the given server configuration.
func ValidateServerConfig(ctx context.Context, c *ServerConfig) ([]deprecation.Warning, error) {
var warnings []deprecation.Warning
// Validation for stream_idle_timeout
if c.StreamIdleTimeout != "" {
if _, err := time.ParseDuration(c.StreamIdleTimeout); err != nil {
return warnings, fmt.Errorf("invalid stream idle timeout: %w", err)
}
}
return warnings, nil
}
func (config *Config) GetSandboxRuntime(podSandboxConfig *runtime.PodSandboxConfig, runtimeHandler string) (Runtime, error) {
if untrustedWorkload(podSandboxConfig) {
// If the untrusted annotation is provided, runtimeHandler MUST be empty.
if runtimeHandler != "" && runtimeHandler != RuntimeUntrusted {
return Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
}
// If the untrusted workload is requesting access to the host/node, this request will fail.
//
// Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
// runtime may support this. For example, in a virtual-machine isolated runtime, privileged
// is a supported option, granting the workload to access the entire guest VM instead of host.
// TODO(windows): Deprecate this so that we don't need to handle it for windows.
if hostAccessingSandbox(podSandboxConfig) {
return Runtime{}, errors.New("untrusted workload with host access is not allowed")
}
runtimeHandler = RuntimeUntrusted
}
if runtimeHandler == "" {
runtimeHandler = config.DefaultRuntimeName
}
r, ok := config.Runtimes[runtimeHandler]
if !ok {
return Runtime{}, fmt.Errorf("no runtime for %q is configured", runtimeHandler)
}
return r, nil
}
// untrustedWorkload returns true if the sandbox contains untrusted workload.
func untrustedWorkload(config *runtime.PodSandboxConfig) bool {
return config.GetAnnotations()[annotations.UntrustedWorkload] == "true"
}
// hostAccessingSandbox returns true if the sandbox configuration
// requires additional host access for the sandbox.
func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool {
securityContext := config.GetLinux().GetSecurityContext()
namespaceOptions := securityContext.GetNamespaceOptions()
if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE ||
namespaceOptions.GetPid() == runtime.NamespaceMode_NODE ||
namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE {
return true
}
return false
}
// GenerateRuntimeOptions generates runtime options from cri plugin config.
func GenerateRuntimeOptions(r Runtime) (interface{}, error) {
if r.Options == nil {
return nil, nil
}
b, err := toml.Marshal(r.Options)
if err != nil {
return nil, fmt.Errorf("failed to marshal TOML blob for runtime %q: %w", r.Type, err)
}
options := getRuntimeOptionsType(r.Type)
if err := toml.Unmarshal(b, options); err != nil {
return nil, err
}
// For generic configuration, if no config path specified (preserving old behavior), pass
// the whole TOML configuration section to the runtime.
if runtimeOpts, ok := options.(*runtimeoptions.Options); ok && runtimeOpts.ConfigPath == "" {
runtimeOpts.ConfigBody = b
}
return options, nil
}
// getRuntimeOptionsType gets empty runtime options by the runtime type name.
func getRuntimeOptionsType(t string) interface{} {
switch t {
case plugins.RuntimeRuncV2:
return &runcoptions.Options{}
case plugins.RuntimeRunhcsV1:
return &runhcsoptions.Options{}
default:
return &runtimeoptions.Options{}
}
}
func DefaultServerConfig() ServerConfig {
return ServerConfig{
DisableTCPService: true,
StreamServerAddress: "127.0.0.1",
StreamServerPort: "0",
StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "",
TLSCertFile: "",
},
}
}

View File

@@ -1,53 +0,0 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"context"
"errors"
"fmt"
kernel "github.com/containerd/containerd/v2/pkg/kernelversion"
)
var kernelGreaterEqualThan = kernel.GreaterEqualThan
func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
if c.EnableUnprivilegedICMP || c.EnableUnprivilegedPorts {
fourDotEleven := kernel.KernelVersion{Kernel: 4, Major: 11}
ok, err := kernelGreaterEqualThan(fourDotEleven)
if err != nil {
return fmt.Errorf("check current system kernel version error: %w", err)
}
if !ok {
return errors.New("unprivileged_icmp and unprivileged_port require kernel version greater than or equal to 4.11")
}
}
return nil
}
var kernelSupportsRro bool
func init() {
var err error
kernelSupportsRro, err = kernelGreaterEqualThan(kernel.KernelVersion{Kernel: 5, Major: 12})
if err != nil {
panic(fmt.Errorf("check current system kernel version error: %w", err))
}
}

View File

@@ -1,104 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"context"
"testing"
kernel "github.com/containerd/containerd/v2/pkg/kernelversion"
"github.com/stretchr/testify/assert"
)
func TestValidateEnableUnprivileged(t *testing.T) {
origKernelGreaterEqualThan := kernelGreaterEqualThan
t.Cleanup(func() {
kernelGreaterEqualThan = origKernelGreaterEqualThan
})
tests := []struct {
name string
config *RuntimeConfig
kernelGreater bool
expectedErr string
}{
{
name: "disable unprivileged_icmp and unprivileged_port",
config: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: "default",
},
},
},
EnableUnprivilegedICMP: false,
EnableUnprivilegedPorts: false,
},
expectedErr: "",
},
{
name: "enable unprivileged_icmp or unprivileged_port, but kernel version is smaller than 4.11",
config: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: "default",
},
},
},
EnableUnprivilegedICMP: true,
EnableUnprivilegedPorts: true,
},
kernelGreater: false,
expectedErr: "unprivileged_icmp and unprivileged_port require kernel version greater than or equal to 4.11",
},
{
name: "enable unprivileged_icmp or unprivileged_port, but kernel version is greater than or equal 4.11",
config: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: "default",
},
},
},
EnableUnprivilegedICMP: true,
EnableUnprivilegedPorts: true,
},
kernelGreater: true,
expectedErr: "",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
kernelGreaterEqualThan = func(minVersion kernel.KernelVersion) (bool, error) {
return test.kernelGreater, nil
}
err := ValidateEnableUnprivileged(context.Background(), test.config)
if test.expectedErr != "" {
assert.Equal(t, err.Error(), test.expectedErr)
} else {
assert.NoError(t, err)
}
})
}
}

View File

@@ -1,29 +0,0 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"context"
)
func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
return nil
}
var kernelSupportsRro bool

View File

@@ -1,309 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/deprecation"
)
func TestValidateConfig(t *testing.T) {
for desc, test := range map[string]struct {
runtimeConfig *RuntimeConfig
runtimeExpectedErr string
runtimeExpected *RuntimeConfig
imageConfig *ImageConfig
imageExpectedErr string
imageExpected *ImageConfig
serverConfig *ServerConfig
serverExpectedErr string
serverExpected *ServerConfig
warnings []deprecation.Warning
}{
"no default_runtime_name": {
runtimeConfig: &RuntimeConfig{},
runtimeExpectedErr: "`default_runtime_name` is empty",
},
"no runtime[default_runtime_name]": {
runtimeConfig: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
},
},
runtimeExpectedErr: "no corresponding runtime configured in `containerd.runtimes` for `containerd` `default_runtime_name = \"default\"",
},
"deprecated auths": {
runtimeConfig: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {},
},
},
},
runtimeExpected: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Sandboxer: string(ModePodSandbox),
},
},
},
},
imageConfig: &ImageConfig{
Registry: Registry{
Auths: map[string]AuthConfig{
"https://gcr.io": {Username: "test"},
},
},
},
imageExpected: &ImageConfig{
Registry: Registry{
Configs: map[string]RegistryConfig{
"gcr.io": {
Auth: &AuthConfig{
Username: "test",
},
},
},
Auths: map[string]AuthConfig{
"https://gcr.io": {Username: "test"},
},
},
},
warnings: []deprecation.Warning{deprecation.CRIRegistryAuths},
},
"invalid stream_idle_timeout": {
serverConfig: &ServerConfig{
StreamIdleTimeout: "invalid",
},
serverExpectedErr: "invalid stream idle timeout",
},
"conflicting mirror registry config": {
imageConfig: &ImageConfig{
Registry: Registry{
ConfigPath: "/etc/containerd/conf.d",
Mirrors: map[string]Mirror{
"something.io": {},
},
},
},
imageExpectedErr: "`mirrors` cannot be set when `config_path` is provided",
},
"deprecated mirrors": {
runtimeConfig: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {},
},
},
},
imageConfig: &ImageConfig{
Registry: Registry{
Mirrors: map[string]Mirror{
"example.com": {},
},
},
},
runtimeExpected: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Sandboxer: string(ModePodSandbox),
},
},
},
},
imageExpected: &ImageConfig{
Registry: Registry{
Mirrors: map[string]Mirror{
"example.com": {},
},
},
},
warnings: []deprecation.Warning{deprecation.CRIRegistryMirrors},
},
"deprecated configs": {
runtimeConfig: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {},
},
},
},
imageConfig: &ImageConfig{
Registry: Registry{
Configs: map[string]RegistryConfig{
"gcr.io": {
Auth: &AuthConfig{
Username: "test",
},
},
},
},
},
runtimeExpected: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Sandboxer: string(ModePodSandbox),
},
},
},
},
imageExpected: &ImageConfig{
Registry: Registry{
Configs: map[string]RegistryConfig{
"gcr.io": {
Auth: &AuthConfig{
Username: "test",
},
},
},
},
},
warnings: []deprecation.Warning{deprecation.CRIRegistryConfigs},
},
"privileged_without_host_devices_all_devices_allowed without privileged_without_host_devices": {
runtimeConfig: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
PrivilegedWithoutHostDevices: false,
PrivilegedWithoutHostDevicesAllDevicesAllowed: true,
Type: "default",
},
},
},
},
runtimeExpectedErr: "`privileged_without_host_devices_all_devices_allowed` requires `privileged_without_host_devices` to be enabled",
},
"invalid drain_exec_sync_io_timeout input": {
runtimeConfig: &RuntimeConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: "default",
},
},
},
DrainExecSyncIOTimeout: "10",
},
runtimeExpectedErr: "invalid `drain_exec_sync_io_timeout`",
},
} {
t.Run(desc, func(t *testing.T) {
var warnings []deprecation.Warning
if test.runtimeConfig != nil {
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig, nil)
if test.runtimeExpectedErr != "" {
assert.Contains(t, err.Error(), test.runtimeExpectedErr)
} else {
assert.NoError(t, err)
assert.Equal(t, test.runtimeExpected, test.runtimeConfig)
}
warnings = append(warnings, w...)
}
if test.imageConfig != nil {
w, err := ValidateImageConfig(context.Background(), test.imageConfig)
if test.imageExpectedErr != "" {
assert.Contains(t, err.Error(), test.imageExpectedErr)
} else {
assert.NoError(t, err)
assert.Equal(t, test.imageExpected, test.imageConfig)
}
warnings = append(warnings, w...)
}
if test.serverConfig != nil {
w, err := ValidateServerConfig(context.Background(), test.serverConfig)
if test.serverExpectedErr != "" {
assert.Contains(t, err.Error(), test.serverExpectedErr)
} else {
assert.NoError(t, err)
assert.Equal(t, test.serverExpected, test.serverConfig)
}
warnings = append(warnings, w...)
}
if len(test.warnings) > 0 {
assert.ElementsMatch(t, test.warnings, warnings)
} else {
assert.Len(t, warnings, 0)
}
})
}
}
func TestHostAccessingSandbox(t *testing.T) {
privilegedContext := &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
Privileged: true,
},
},
}
nonPrivilegedContext := &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
Privileged: false,
},
},
}
hostNamespace := &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
Privileged: false,
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_NODE,
Pid: runtime.NamespaceMode_NODE,
Ipc: runtime.NamespaceMode_NODE,
},
},
},
}
tests := []struct {
name string
config *runtime.PodSandboxConfig
want bool
}{
{"Security Context is nil", nil, false},
{"Security Context is privileged", privilegedContext, false},
{"Security Context is not privileged", nonPrivilegedContext, false},
{"Security Context namespace host access", hostNamespace, true},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
if got := hostAccessingSandbox(tt.config); got != tt.want {
t.Errorf("hostAccessingSandbox() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -1,104 +0,0 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"github.com/containerd/containerd/v2/defaults"
"github.com/pelletier/go-toml/v2"
)
func DefaultImageConfig() ImageConfig {
return ImageConfig{
Snapshotter: defaults.DefaultSnapshotter,
DisableSnapshotAnnotations: true,
MaxConcurrentDownloads: 3,
ImageDecryption: ImageDecryption{
KeyModel: KeyModelNode,
},
PinnedImages: map[string]string{
"sandbox": DefaultSandboxImage,
},
ImagePullProgressTimeout: defaultImagePullProgressTimeoutDuration.String(),
ImagePullWithSyncFs: false,
StatsCollectPeriod: 10,
}
}
// DefaultRuntimeConfig returns default configurations of cri plugin.
func DefaultRuntimeConfig() RuntimeConfig {
defaultRuncV2Opts := `
# NoNewKeyring disables new keyring for the container.
NoNewKeyring = false
# ShimCgroup places the shim in a cgroup.
ShimCgroup = ""
# IoUid sets the I/O's pipes uid.
IoUid = 0
# IoGid sets the I/O's pipes gid.
IoGid = 0
# BinaryName is the binary name of the runc binary.
BinaryName = ""
# Root is the runc root directory.
Root = ""
# CriuImagePath is the criu image path
CriuImagePath = ""
# CriuWorkPath is the criu work path.
CriuWorkPath = ""
`
var m map[string]interface{}
toml.Unmarshal([]byte(defaultRuncV2Opts), &m)
return RuntimeConfig{
CniConfig: CniConfig{
NetworkPluginBinDir: "/opt/cni/bin",
NetworkPluginConfDir: "/etc/cni/net.d",
NetworkPluginMaxConfNum: 1, // only one CNI plugin config file will be loaded
NetworkPluginSetupSerially: false,
NetworkPluginConfTemplate: "",
},
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: "runc",
Runtimes: map[string]Runtime{
"runc": {
Type: "io.containerd.runc.v2",
Options: m,
Sandboxer: string(ModePodSandbox),
},
},
},
EnableSelinux: false,
SelinuxCategoryRange: 1024,
MaxContainerLogLineSize: 16 * 1024,
DisableProcMount: false,
TolerateMissingHugetlbController: true,
DisableHugetlbController: true,
IgnoreImageDefinedVolumes: false,
EnableCDI: true,
CDISpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
DrainExecSyncIOTimeout: "0s",
EnableUnprivilegedPorts: true,
EnableUnprivilegedICMP: true,
}
}

View File

@@ -1,86 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"os"
"path/filepath"
"github.com/containerd/containerd/v2/defaults"
)
func DefaultImageConfig() ImageConfig {
return ImageConfig{
Snapshotter: defaults.DefaultSnapshotter,
StatsCollectPeriod: 10,
MaxConcurrentDownloads: 3,
ImageDecryption: ImageDecryption{
KeyModel: KeyModelNode,
},
PinnedImages: map[string]string{
"sandbox": DefaultSandboxImage,
},
ImagePullProgressTimeout: defaultImagePullProgressTimeoutDuration.String(),
}
}
// DefaultRuntimeConfig returns default configurations of cri plugin.
func DefaultRuntimeConfig() RuntimeConfig {
return RuntimeConfig{
CniConfig: CniConfig{
NetworkPluginBinDir: filepath.Join(os.Getenv("ProgramFiles"), "containerd", "cni", "bin"),
NetworkPluginConfDir: filepath.Join(os.Getenv("ProgramFiles"), "containerd", "cni", "conf"),
NetworkPluginMaxConfNum: 1,
NetworkPluginSetupSerially: false,
NetworkPluginConfTemplate: "",
},
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: "runhcs-wcow-process",
Runtimes: map[string]Runtime{
"runhcs-wcow-process": {
Type: "io.containerd.runhcs.v1",
ContainerAnnotations: []string{"io.microsoft.container.*"},
},
"runhcs-wcow-hypervisor": {
Type: "io.containerd.runhcs.v1",
PodAnnotations: []string{"io.microsoft.virtualmachine.*"},
ContainerAnnotations: []string{"io.microsoft.container.*"},
// Full set of Windows shim options:
// https://pkg.go.dev/github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options#Options
Options: map[string]interface{}{
// SandboxIsolation specifies the isolation level of the sandbox.
// PROCESS (0) and HYPERVISOR (1) are the valid options.
"SandboxIsolation": 1,
// ScaleCpuLimitsToSandbox indicates that the containers CPU
// maximum value (specifies the portion of processor cycles that
// a container can use as a percentage times 100) should be adjusted
// to account for the difference in the number of cores between the
// host and UVM.
//
// This should only be turned on if SandboxIsolation is 1.
"ScaleCpuLimitsToSandbox": true,
},
},
},
},
MaxContainerLogLineSize: 16 * 1024,
IgnoreImageDefinedVolumes: false,
// TODO(windows): Add platform specific config, so that most common defaults can be shared.
DrainExecSyncIOTimeout: "0s",
}
}

View File

@@ -1,163 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"crypto/tls"
"errors"
"fmt"
"net"
"os"
"time"
k8snet "k8s.io/apimachinery/pkg/util/net"
k8scert "k8s.io/client-go/util/cert"
"k8s.io/kubelet/pkg/cri/streaming"
)
type streamListenerMode int
const (
x509KeyPairTLS streamListenerMode = iota
selfSignTLS
withoutTLS
)
func getStreamListenerMode(config *ServerConfig) (streamListenerMode, error) {
if config.EnableTLSStreaming {
if config.X509KeyPairStreaming.TLSCertFile != "" && config.X509KeyPairStreaming.TLSKeyFile != "" {
return x509KeyPairTLS, nil
}
if config.X509KeyPairStreaming.TLSCertFile != "" && config.X509KeyPairStreaming.TLSKeyFile == "" {
return -1, errors.New("must set X509KeyPairStreaming.TLSKeyFile")
}
if config.X509KeyPairStreaming.TLSCertFile == "" && config.X509KeyPairStreaming.TLSKeyFile != "" {
return -1, errors.New("must set X509KeyPairStreaming.TLSCertFile")
}
return selfSignTLS, nil
}
if config.X509KeyPairStreaming.TLSCertFile != "" {
return -1, errors.New("X509KeyPairStreaming.TLSCertFile is set but EnableTLSStreaming is not set")
}
if config.X509KeyPairStreaming.TLSKeyFile != "" {
return -1, errors.New("X509KeyPairStreaming.TLSKeyFile is set but EnableTLSStreaming is not set")
}
return withoutTLS, nil
}
func (c *ServerConfig) StreamingConfig() (streaming.Config, error) {
var (
addr = c.StreamServerAddress
port = c.StreamServerPort
streamIdleTimeout = c.StreamIdleTimeout
)
if addr == "" {
a, err := k8snet.ResolveBindAddress(nil)
if err != nil {
return streaming.Config{}, fmt.Errorf("failed to get stream server address: %w", err)
}
addr = a.String()
}
config := streaming.DefaultConfig
if streamIdleTimeout != "" {
var err error
config.StreamIdleTimeout, err = time.ParseDuration(streamIdleTimeout)
if err != nil {
return streaming.Config{}, fmt.Errorf("invalid stream idle timeout: %w", err)
}
}
config.Addr = net.JoinHostPort(addr, port)
tlsMode, err := getStreamListenerMode(c)
if err != nil {
return streaming.Config{}, fmt.Errorf("invalid stream server configuration: %w", err)
}
switch tlsMode {
case x509KeyPairTLS:
tlsCert, err := tls.LoadX509KeyPair(c.X509KeyPairStreaming.TLSCertFile, c.X509KeyPairStreaming.TLSKeyFile)
if err != nil {
return streaming.Config{}, fmt.Errorf("failed to load x509 key pair for stream server: %w", err)
}
config.TLSConfig = &tls.Config{
Certificates: []tls.Certificate{tlsCert},
}
case selfSignTLS:
tlsCert, err := newTLSCert()
if err != nil {
return streaming.Config{}, fmt.Errorf("failed to generate tls certificate for stream server: %w", err)
}
config.TLSConfig = &tls.Config{
Certificates: []tls.Certificate{tlsCert},
}
case withoutTLS:
default:
return streaming.Config{}, errors.New("invalid configuration for the stream listener")
}
return config, nil
}
// newTLSCert returns a self CA signed tls.certificate.
// TODO (mikebrow): replace / rewrite this function to support using CA
// signing of the certificate. Requires a security plan for kubernetes regarding
// CRI connections / streaming, etc. For example, kubernetes could configure or
// require a CA service and pass a configuration down through CRI.
func newTLSCert() (tls.Certificate, error) {
fail := func(err error) (tls.Certificate, error) { return tls.Certificate{}, err }
hostName, err := os.Hostname()
if err != nil {
return fail(fmt.Errorf("failed to get hostname: %w", err))
}
addrs, err := net.InterfaceAddrs()
if err != nil {
return fail(fmt.Errorf("failed to get host IP addresses: %w", err))
}
var alternateIPs []net.IP
var alternateDNS []string
for _, addr := range addrs {
var ip net.IP
switch v := addr.(type) {
case *net.IPNet:
ip = v.IP
case *net.IPAddr:
ip = v.IP
default:
continue
}
alternateIPs = append(alternateIPs, ip)
alternateDNS = append(alternateDNS, ip.String())
}
// Generate a self signed certificate key (CA is self)
certPem, keyPem, err := k8scert.GenerateSelfSignedCertKey(hostName, alternateIPs, alternateDNS)
if err != nil {
return fail(fmt.Errorf("certificate key could not be created: %w", err))
}
// Load the tls certificate
tlsCert, err := tls.X509KeyPair(certPem, keyPem)
if err != nil {
return fail(fmt.Errorf("certificate could not be loaded: %w", err))
}
return tlsCert, nil
}

View File

@@ -1,130 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestValidateStreamServer(t *testing.T) {
for _, test := range []struct {
desc string
config ServerConfig
tlsMode streamListenerMode
expectErr bool
}{
{
desc: "should pass with default withoutTLS",
config: DefaultServerConfig(),
tlsMode: withoutTLS,
expectErr: false,
},
{
desc: "should pass with x509KeyPairTLS",
config: ServerConfig{
EnableTLSStreaming: true,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "non-empty",
TLSCertFile: "non-empty",
},
},
tlsMode: x509KeyPairTLS,
expectErr: false,
},
{
desc: "should pass with selfSign",
config: ServerConfig{
EnableTLSStreaming: true,
},
tlsMode: selfSignTLS,
expectErr: false,
},
{
desc: "should return error with X509 keypair but not EnableTLSStreaming",
config: ServerConfig{
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "non-empty",
TLSCertFile: "non-empty",
},
},
tlsMode: -1,
expectErr: true,
},
{
desc: "should return error with X509 TLSCertFile empty",
config: ServerConfig{
EnableTLSStreaming: true,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "non-empty",
TLSCertFile: "",
},
},
tlsMode: -1,
expectErr: true,
},
{
desc: "should return error with X509 TLSKeyFile empty",
config: ServerConfig{
EnableTLSStreaming: true,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "",
TLSCertFile: "non-empty",
},
},
tlsMode: -1,
expectErr: true,
},
{
desc: "should return error without EnableTLSStreaming and only TLSCertFile set",
config: ServerConfig{
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "",
TLSCertFile: "non-empty",
},
},
tlsMode: -1,
expectErr: true,
},
{
desc: "should return error without EnableTLSStreaming and only TLSKeyFile set",
config: ServerConfig{
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "non-empty",
TLSCertFile: "",
},
},
tlsMode: -1,
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
tlsMode, err := getStreamListenerMode(&test.config)
if test.expectErr {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.tlsMode, tlsMode)
})
}
}

View File

@@ -1,24 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package constants
const (
// K8sContainerdNamespace is the namespace we use to connect containerd.
K8sContainerdNamespace = "k8s.io"
// CRIVersion is the latest CRI version supported by the CRI plugin.
CRIVersion = "v1"
)

View File

@@ -1,638 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package instrument
import (
"context"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
ctrdutil "github.com/containerd/containerd/v2/pkg/cri/util"
)
const (
// criSpanPrefix is a prefix for CRI server specific spans
criSpanPrefix = "pkg.cri.server"
)
// criService is an CRI server dependency to be wrapped with instrumentation.
type criService interface {
GRPCServices
IsInitialized() bool
}
// GRPCServices are all the grpc services provided by cri containerd.
type GRPCServices interface {
runtime.RuntimeServiceServer
runtime.ImageServiceServer
}
// instrumentedService wraps service with containerd namespace and logs.
type instrumentedService struct {
c criService
}
func NewService(c criService) GRPCServices {
return &instrumentedService{c: c}
}
// checkInitialized returns error if the server is not fully initialized.
// GRPC service request handlers should return error before server is fully
// initialized.
// NOTE(random-liu): All following functions MUST check initialized at the beginning.
func (in *instrumentedService) checkInitialized() error {
if in.c.IsInitialized() {
return nil
}
return errdefs.ToGRPCf(errdefs.ErrUnavailable, "server is not initialized yet")
}
func (in *instrumentedService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (res *runtime.RunPodSandboxResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("RunPodSandbox for %+v", r.GetConfig().GetMetadata())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("RunPodSandbox for %+v failed, error", r.GetConfig().GetMetadata())
} else {
log.G(ctx).Infof("RunPodSandbox for %+v returns sandbox id %q", r.GetConfig().GetMetadata(), res.GetPodSandboxId())
}
}()
res, err = in.c.RunPodSandbox(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (res *runtime.ListPodSandboxResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("ListPodSandbox with filter %+v", r.GetFilter())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("ListPodSandbox failed")
} else {
log.G(ctx).Tracef("ListPodSandbox returns pod sandboxes %+v", res.GetItems())
}
}()
res, err = in.c.ListPodSandbox(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (res *runtime.PodSandboxStatusResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("PodSandboxStatus for %q", r.GetPodSandboxId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("PodSandboxStatus for %q failed", r.GetPodSandboxId())
} else {
log.G(ctx).Tracef("PodSandboxStatus for %q returns status %+v", r.GetPodSandboxId(), res.GetStatus())
}
}()
res, err = in.c.PodSandboxStatus(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (_ *runtime.StopPodSandboxResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("StopPodSandbox for %q", r.GetPodSandboxId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("StopPodSandbox for %q failed", r.GetPodSandboxId())
} else {
log.G(ctx).Infof("StopPodSandbox for %q returns successfully", r.GetPodSandboxId())
}
}()
res, err := in.c.StopPodSandbox(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (_ *runtime.RemovePodSandboxResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("RemovePodSandbox for %q", r.GetPodSandboxId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("RemovePodSandbox for %q failed", r.GetPodSandboxId())
} else {
log.G(ctx).Infof("RemovePodSandbox %q returns successfully", r.GetPodSandboxId())
}
}()
res, err := in.c.RemovePodSandbox(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (res *runtime.PortForwardResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("Portforward for %q port %v", r.GetPodSandboxId(), r.GetPort())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("Portforward for %q failed", r.GetPodSandboxId())
} else {
log.G(ctx).Infof("Portforward for %q returns URL %q", r.GetPodSandboxId(), res.GetUrl())
}
}()
res, err = in.c.PortForward(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (res *runtime.CreateContainerResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("CreateContainer within sandbox %q for container %+v",
r.GetPodSandboxId(), r.GetConfig().GetMetadata())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("CreateContainer within sandbox %q for %+v failed",
r.GetPodSandboxId(), r.GetConfig().GetMetadata())
} else {
log.G(ctx).Infof("CreateContainer within sandbox %q for %+v returns container id %q",
r.GetPodSandboxId(), r.GetConfig().GetMetadata(), res.GetContainerId())
}
}()
res, err = in.c.CreateContainer(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (_ *runtime.StartContainerResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("StartContainer for %q", r.GetContainerId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("StartContainer for %q failed", r.GetContainerId())
} else {
log.G(ctx).Infof("StartContainer for %q returns successfully", r.GetContainerId())
}
}()
res, err := in.c.StartContainer(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (res *runtime.ListContainersResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("ListContainers with filter %+v", r.GetFilter())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ListContainers with filter %+v failed", r.GetFilter())
} else {
log.G(ctx).Tracef("ListContainers with filter %+v returns containers %+v",
r.GetFilter(), res.GetContainers())
}
}()
res, err = in.c.ListContainers(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (res *runtime.ContainerStatusResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("ContainerStatus for %q", r.GetContainerId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ContainerStatus for %q failed", r.GetContainerId())
} else {
log.G(ctx).Tracef("ContainerStatus for %q returns status %+v", r.GetContainerId(), res.GetStatus())
}
}()
res, err = in.c.ContainerStatus(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (res *runtime.StopContainerResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("StopContainer for %q with timeout %d (s)", r.GetContainerId(), r.GetTimeout())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("StopContainer for %q failed", r.GetContainerId())
} else {
log.G(ctx).Infof("StopContainer for %q returns successfully", r.GetContainerId())
}
}()
res, err = in.c.StopContainer(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (res *runtime.RemoveContainerResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("RemoveContainer for %q", r.GetContainerId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("RemoveContainer for %q failed", r.GetContainerId())
} else {
log.G(ctx).Infof("RemoveContainer for %q returns successfully", r.GetContainerId())
}
}()
res, err = in.c.RemoveContainer(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (res *runtime.ExecSyncResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Debugf("ExecSync for %q with command %+v and timeout %d (s)", r.GetContainerId(), r.GetCmd(), r.GetTimeout())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ExecSync for %q failed", r.GetContainerId())
} else {
log.G(ctx).Tracef("ExecSync for %q returns with exit code %d", r.GetContainerId(), res.GetExitCode())
}
}()
res, err = in.c.ExecSync(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) Exec(ctx context.Context, r *runtime.ExecRequest) (res *runtime.ExecResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Debugf("Exec for %q with command %+v, tty %v and stdin %v",
r.GetContainerId(), r.GetCmd(), r.GetTty(), r.GetStdin())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("Exec for %q failed", r.GetContainerId())
} else {
log.G(ctx).Debugf("Exec for %q returns URL %q", r.GetContainerId(), res.GetUrl())
}
}()
res, err = in.c.Exec(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) Attach(ctx context.Context, r *runtime.AttachRequest) (res *runtime.AttachResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Debugf("Attach for %q with tty %v and stdin %v", r.GetContainerId(), r.GetTty(), r.GetStdin())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("Attach for %q failed", r.GetContainerId())
} else {
log.G(ctx).Debugf("Attach for %q returns URL %q", r.GetContainerId(), res.Url)
}
}()
res, err = in.c.Attach(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (res *runtime.UpdateContainerResourcesResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("UpdateContainerResources for %q with Linux: %+v / Windows: %+v", r.GetContainerId(), r.GetLinux(), r.GetWindows())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("UpdateContainerResources for %q failed", r.GetContainerId())
} else {
log.G(ctx).Infof("UpdateContainerResources for %q returns successfully", r.GetContainerId())
}
}()
res, err = in.c.UpdateContainerResources(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (res *runtime.PullImageResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
ctx, span := tracing.StartSpan(ctx, tracing.Name(criSpanPrefix, "PullImage"))
defer span.End()
log.G(ctx).Infof("PullImage %q", r.GetImage().GetImage())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("PullImage %q failed", r.GetImage().GetImage())
} else {
log.G(ctx).Infof("PullImage %q returns image reference %q",
r.GetImage().GetImage(), res.GetImageRef())
}
span.SetStatus(err)
}()
res, err = in.c.PullImage(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (res *runtime.ListImagesResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
ctx, span := tracing.StartSpan(ctx, tracing.Name(criSpanPrefix, "ListImages"))
defer span.End()
log.G(ctx).Tracef("ListImages with filter %+v", r.GetFilter())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ListImages with filter %+v failed", r.GetFilter())
} else {
log.G(ctx).Tracef("ListImages with filter %+v returns image list %+v",
r.GetFilter(), res.GetImages())
}
span.SetStatus(err)
}()
res, err = in.c.ListImages(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (res *runtime.ImageStatusResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
ctx, span := tracing.StartSpan(ctx, tracing.Name(criSpanPrefix, "ImageStatus"))
defer span.End()
log.G(ctx).Tracef("ImageStatus for %q", r.GetImage().GetImage())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ImageStatus for %q failed", r.GetImage().GetImage())
} else {
log.G(ctx).Tracef("ImageStatus for %q returns image status %+v",
r.GetImage().GetImage(), res.GetImage())
}
span.SetStatus(err)
}()
res, err = in.c.ImageStatus(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (_ *runtime.RemoveImageResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
ctx, span := tracing.StartSpan(ctx, tracing.Name(criSpanPrefix, "RemoveImage"))
defer span.End()
log.G(ctx).Infof("RemoveImage %q", r.GetImage().GetImage())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("RemoveImage %q failed", r.GetImage().GetImage())
} else {
log.G(ctx).Infof("RemoveImage %q returns successfully", r.GetImage().GetImage())
}
span.SetStatus(err)
}()
res, err := in.c.RemoveImage(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (res *runtime.ImageFsInfoResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
ctx, span := tracing.StartSpan(ctx, tracing.Name(criSpanPrefix, "ImageFsInfo"))
defer span.End()
log.G(ctx).Tracef("ImageFsInfo")
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("ImageFsInfo failed")
} else {
log.G(ctx).Tracef("ImageFsInfo returns filesystem info %+v", res.ImageFilesystems)
}
span.SetStatus(err)
}()
res, err = in.c.ImageFsInfo(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) PodSandboxStats(ctx context.Context, r *runtime.PodSandboxStatsRequest) (res *runtime.PodSandboxStatsResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("PodSandboxStats for %q", r.GetPodSandboxId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("PodSandboxStats for %q failed", r.GetPodSandboxId())
} else {
log.G(ctx).Tracef("PodSandboxStats for %q returns stats %+v", r.GetPodSandboxId(), res.GetStats())
}
}()
res, err = in.c.PodSandboxStats(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ContainerStats(ctx context.Context, r *runtime.ContainerStatsRequest) (res *runtime.ContainerStatsResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("ContainerStats for %q", r.GetContainerId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ContainerStats for %q failed", r.GetContainerId())
} else {
log.G(ctx).Tracef("ContainerStats for %q returns stats %+v", r.GetContainerId(), res.GetStats())
}
}()
res, err = in.c.ContainerStats(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListPodSandboxStats(ctx context.Context, r *runtime.ListPodSandboxStatsRequest) (res *runtime.ListPodSandboxStatsResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("ListPodSandboxStats with filter %+v", r.GetFilter())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("ListPodSandboxStats failed")
} else {
log.G(ctx).Tracef("ListPodSandboxStats returns stats %+v", res.GetStats())
}
}()
res, err = in.c.ListPodSandboxStats(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListContainerStats(ctx context.Context, r *runtime.ListContainerStatsRequest) (res *runtime.ListContainerStatsResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("ListContainerStats with filter %+v", r.GetFilter())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("ListContainerStats failed")
} else {
log.G(ctx).Tracef("ListContainerStats returns stats %+v", res.GetStats())
}
}()
res, err = in.c.ListContainerStats(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) Status(ctx context.Context, r *runtime.StatusRequest) (res *runtime.StatusResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("Status")
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("Status failed")
} else {
log.G(ctx).Tracef("Status returns status %+v", res.GetStatus())
}
}()
res, err = in.c.Status(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) Version(ctx context.Context, r *runtime.VersionRequest) (res *runtime.VersionResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("Version with client side version %q", r.GetVersion())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("Version failed")
} else {
log.G(ctx).Tracef("Version returns %+v", res)
}
}()
res, err = in.c.Version(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (res *runtime.UpdateRuntimeConfigResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Debugf("UpdateRuntimeConfig with config %+v", r.GetRuntimeConfig())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("UpdateRuntimeConfig failed")
} else {
log.G(ctx).Debug("UpdateRuntimeConfig returns successfully")
}
}()
res, err = in.c.UpdateRuntimeConfig(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (res *runtime.ReopenContainerLogResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Debugf("ReopenContainerLog for %q", r.GetContainerId())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ReopenContainerLog for %q failed", r.GetContainerId())
} else {
log.G(ctx).Debugf("ReopenContainerLog for %q returns successfully", r.GetContainerId())
}
}()
res, err = in.c.ReopenContainerLog(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("CheckpointContainer failed, error")
} else {
log.G(ctx).Debug("CheckpointContainer returns successfully")
}
}()
res, err = in.c.CheckpointContainer(ctx, r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) GetContainerEvents(r *runtime.GetEventsRequest, s runtime.RuntimeService_GetContainerEventsServer) (err error) {
if err := in.checkInitialized(); err != nil {
return err
}
ctx := s.Context()
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("GetContainerEvents failed, error")
} else {
log.G(ctx).Debug("GetContainerEvents returns successfully")
}
}()
err = in.c.GetContainerEvents(r, s)
return errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListMetricDescriptors(ctx context.Context, r *runtime.ListMetricDescriptorsRequest) (res *runtime.ListMetricDescriptorsResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ListMetricDescriptors failed, error")
} else {
log.G(ctx).Trace("ListMetricDescriptors returns successfully")
}
}()
res, err = in.c.ListMetricDescriptors(ctx, r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) ListPodSandboxMetrics(ctx context.Context, r *runtime.ListPodSandboxMetricsRequest) (res *runtime.ListPodSandboxMetricsResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("ListPodSandboxMetrics failed, error")
} else {
log.G(ctx).Trace("ListPodSandboxMetrics returns successfully")
}
}()
res, err = in.c.ListPodSandboxMetrics(ctx, r)
return res, errdefs.ToGRPC(err)
}
func (in *instrumentedService) RuntimeConfig(ctx context.Context, r *runtime.RuntimeConfigRequest) (res *runtime.RuntimeConfigResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("RuntimeConfig")
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("RuntimeConfig failed")
} else {
log.G(ctx).Tracef("RuntimeConfig returns config %+v", res)
}
}()
res, err = in.c.RuntimeConfig(ctx, r)
return res, errdefs.ToGRPC(err)
}

View File

@@ -1,236 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"errors"
"io"
"strings"
"sync"
"github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/log"
"github.com/containerd/containerd/v2/pkg/cri/util"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
)
// streamKey generates a key for the stream.
func streamKey(id, name string, stream StreamType) string {
return strings.Join([]string{id, name, string(stream)}, "-")
}
// ContainerIO holds the container io.
type ContainerIO struct {
id string
fifos *cio.FIFOSet
*stdioPipes
stdoutGroup *cioutil.WriterGroup
stderrGroup *cioutil.WriterGroup
closer *wgCloser
}
var _ cio.IO = &ContainerIO{}
// ContainerIOOpts sets specific information to newly created ContainerIO.
type ContainerIOOpts func(*ContainerIO) error
// WithFIFOs specifies existing fifos for the container io.
func WithFIFOs(fifos *cio.FIFOSet) ContainerIOOpts {
return func(c *ContainerIO) error {
c.fifos = fifos
return nil
}
}
// WithNewFIFOs creates new fifos for the container io.
func WithNewFIFOs(root string, tty, stdin bool) ContainerIOOpts {
return func(c *ContainerIO) error {
fifos, err := newFifos(root, c.id, tty, stdin)
if err != nil {
return err
}
return WithFIFOs(fifos)(c)
}
}
// NewContainerIO creates container io.
func NewContainerIO(id string, opts ...ContainerIOOpts) (_ *ContainerIO, err error) {
c := &ContainerIO{
id: id,
stdoutGroup: cioutil.NewWriterGroup(),
stderrGroup: cioutil.NewWriterGroup(),
}
for _, opt := range opts {
if err := opt(c); err != nil {
return nil, err
}
}
if c.fifos == nil {
return nil, errors.New("fifos are not set")
}
// Create actual fifos.
stdio, closer, err := newStdioPipes(c.fifos)
if err != nil {
return nil, err
}
c.stdioPipes = stdio
c.closer = closer
return c, nil
}
// Config returns io config.
func (c *ContainerIO) Config() cio.Config {
return c.fifos.Config
}
// Pipe creates container fifos and pipe container output
// to output stream.
func (c *ContainerIO) Pipe() {
wg := c.closer.wg
if c.stdout != nil {
wg.Add(1)
go func() {
if _, err := io.Copy(c.stdoutGroup, c.stdout); err != nil {
log.L.WithError(err).Errorf("Failed to pipe stdout of container %q", c.id)
}
c.stdout.Close()
c.stdoutGroup.Close()
wg.Done()
log.L.Debugf("Finish piping stdout of container %q", c.id)
}()
}
if !c.fifos.Terminal && c.stderr != nil {
wg.Add(1)
go func() {
if _, err := io.Copy(c.stderrGroup, c.stderr); err != nil {
log.L.WithError(err).Errorf("Failed to pipe stderr of container %q", c.id)
}
c.stderr.Close()
c.stderrGroup.Close()
wg.Done()
log.L.Debugf("Finish piping stderr of container %q", c.id)
}()
}
}
// Attach attaches container stdio.
// TODO(random-liu): Use pools.Copy in docker to reduce memory usage?
func (c *ContainerIO) Attach(opts AttachOptions) {
var wg sync.WaitGroup
key := util.GenerateID()
stdinKey := streamKey(c.id, "attach-"+key, Stdin)
stdoutKey := streamKey(c.id, "attach-"+key, Stdout)
stderrKey := streamKey(c.id, "attach-"+key, Stderr)
var stdinStreamRC io.ReadCloser
if c.stdin != nil && opts.Stdin != nil {
// Create a wrapper of stdin which could be closed. Note that the
// wrapper doesn't close the actual stdin, it only stops io.Copy.
// The actual stdin will be closed by stream server.
stdinStreamRC = cioutil.NewWrapReadCloser(opts.Stdin)
wg.Add(1)
go func() {
if _, err := io.Copy(c.stdin, stdinStreamRC); err != nil {
log.L.WithError(err).Errorf("Failed to pipe stdin for container attach %q", c.id)
}
log.L.Infof("Attach stream %q closed", stdinKey)
if opts.StdinOnce && !opts.Tty {
// Due to kubectl requirements and current docker behavior, when (opts.StdinOnce &&
// opts.Tty) we have to close container stdin and keep stdout and stderr open until
// container stops.
c.stdin.Close()
// Also closes the containerd side.
if err := opts.CloseStdin(); err != nil {
log.L.WithError(err).Errorf("Failed to close stdin for container %q", c.id)
}
} else {
if opts.Stdout != nil {
c.stdoutGroup.Remove(stdoutKey)
}
if opts.Stderr != nil {
c.stderrGroup.Remove(stderrKey)
}
}
wg.Done()
}()
}
attachStream := func(key string, close <-chan struct{}) {
<-close
log.L.Infof("Attach stream %q closed", key)
// Make sure stdin gets closed.
if stdinStreamRC != nil {
stdinStreamRC.Close()
}
wg.Done()
}
if opts.Stdout != nil {
wg.Add(1)
wc, close := cioutil.NewWriteCloseInformer(opts.Stdout)
c.stdoutGroup.Add(stdoutKey, wc)
go attachStream(stdoutKey, close)
}
if !opts.Tty && opts.Stderr != nil {
wg.Add(1)
wc, close := cioutil.NewWriteCloseInformer(opts.Stderr)
c.stderrGroup.Add(stderrKey, wc)
go attachStream(stderrKey, close)
}
wg.Wait()
}
// AddOutput adds new write closers to the container stream, and returns existing
// write closers if there are any.
func (c *ContainerIO) AddOutput(name string, stdout, stderr io.WriteCloser) (io.WriteCloser, io.WriteCloser) {
var oldStdout, oldStderr io.WriteCloser
if stdout != nil {
key := streamKey(c.id, name, Stdout)
oldStdout = c.stdoutGroup.Get(key)
c.stdoutGroup.Add(key, stdout)
}
if stderr != nil {
key := streamKey(c.id, name, Stderr)
oldStderr = c.stderrGroup.Get(key)
c.stderrGroup.Add(key, stderr)
}
return oldStdout, oldStderr
}
// Cancel cancels container io.
func (c *ContainerIO) Cancel() {
c.closer.Cancel()
}
// Wait waits container io to finish.
func (c *ContainerIO) Wait() {
c.closer.Wait()
}
// Close closes all FIFOs.
func (c *ContainerIO) Close() error {
c.closer.Close()
if c.fifos != nil {
return c.fifos.Close()
}
return nil
}

View File

@@ -1,145 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"io"
"sync"
"github.com/containerd/containerd/v2/pkg/cio"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
"github.com/containerd/log"
)
// ExecIO holds the exec io.
type ExecIO struct {
id string
fifos *cio.FIFOSet
*stdioPipes
closer *wgCloser
}
var _ cio.IO = &ExecIO{}
// NewExecIO creates exec io.
func NewExecIO(id, root string, tty, stdin bool) (*ExecIO, error) {
fifos, err := newFifos(root, id, tty, stdin)
if err != nil {
return nil, err
}
stdio, closer, err := newStdioPipes(fifos)
if err != nil {
return nil, err
}
return &ExecIO{
id: id,
fifos: fifos,
stdioPipes: stdio,
closer: closer,
}, nil
}
// Config returns io config.
func (e *ExecIO) Config() cio.Config {
return e.fifos.Config
}
// Attach attaches exec stdio. The logic is similar with container io attach.
func (e *ExecIO) Attach(opts AttachOptions) <-chan struct{} {
var wg sync.WaitGroup
var stdinStreamRC io.ReadCloser
if e.stdin != nil && opts.Stdin != nil {
stdinStreamRC = cioutil.NewWrapReadCloser(opts.Stdin)
wg.Add(1)
go func() {
if _, err := io.Copy(e.stdin, stdinStreamRC); err != nil {
log.L.WithError(err).Errorf("Failed to redirect stdin for container exec %q", e.id)
}
log.L.Infof("Container exec %q stdin closed", e.id)
if opts.StdinOnce && !opts.Tty {
e.stdin.Close()
if err := opts.CloseStdin(); err != nil {
log.L.WithError(err).Errorf("Failed to close stdin for container exec %q", e.id)
}
} else {
if e.stdout != nil {
e.stdout.Close()
}
if e.stderr != nil {
e.stderr.Close()
}
}
wg.Done()
}()
}
attachOutput := func(t StreamType, stream io.WriteCloser, out io.ReadCloser) {
if _, err := io.Copy(stream, out); err != nil {
log.L.WithError(err).Errorf("Failed to pipe %q for container exec %q", t, e.id)
}
out.Close()
stream.Close()
if stdinStreamRC != nil {
stdinStreamRC.Close()
}
e.closer.wg.Done()
wg.Done()
log.L.Debugf("Finish piping %q of container exec %q", t, e.id)
}
if opts.Stdout != nil {
wg.Add(1)
// Closer should wait for this routine to be over.
e.closer.wg.Add(1)
go attachOutput(Stdout, opts.Stdout, e.stdout)
}
if !opts.Tty && opts.Stderr != nil {
wg.Add(1)
// Closer should wait for this routine to be over.
e.closer.wg.Add(1)
go attachOutput(Stderr, opts.Stderr, e.stderr)
}
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
return done
}
// Cancel cancels exec io.
func (e *ExecIO) Cancel() {
e.closer.Cancel()
}
// Wait waits exec io to finish.
func (e *ExecIO) Wait() {
e.closer.Wait()
}
// Close closes all FIFOs.
func (e *ExecIO) Close() error {
if e.closer != nil {
e.closer.Close()
}
if e.fifos != nil {
return e.fifos.Close()
}
return nil
}

View File

@@ -1,144 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"context"
"io"
"os"
"path/filepath"
"sync"
"syscall"
"github.com/containerd/containerd/v2/pkg/cio"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// AttachOptions specifies how to attach to a container.
type AttachOptions struct {
Stdin io.Reader
Stdout io.WriteCloser
Stderr io.WriteCloser
Tty bool
StdinOnce bool
// CloseStdin is the function to close container stdin.
CloseStdin func() error
}
// StreamType is the type of the stream, stdout/stderr.
type StreamType string
const (
// Stdin stream type.
Stdin StreamType = "stdin"
// Stdout stream type.
Stdout = StreamType(runtime.Stdout)
// Stderr stream type.
Stderr = StreamType(runtime.Stderr)
)
type wgCloser struct {
ctx context.Context
wg *sync.WaitGroup
set []io.Closer
cancel context.CancelFunc
}
func (g *wgCloser) Wait() {
g.wg.Wait()
}
func (g *wgCloser) Close() {
for _, f := range g.set {
f.Close()
}
}
func (g *wgCloser) Cancel() {
g.cancel()
}
// newFifos creates fifos directory for a container.
func newFifos(root, id string, tty, stdin bool) (*cio.FIFOSet, error) {
root = filepath.Join(root, "io")
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
fifos, err := cio.NewFIFOSetInDir(root, id, tty)
if err != nil {
return nil, err
}
if !stdin {
fifos.Stdin = ""
}
return fifos, nil
}
type stdioPipes struct {
stdin io.WriteCloser
stdout io.ReadCloser
stderr io.ReadCloser
}
// newStdioPipes creates actual fifos for stdio.
func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) {
var (
f io.ReadWriteCloser
set []io.Closer
ctx, cancel = context.WithCancel(context.Background())
p = &stdioPipes{}
)
defer func() {
if err != nil {
for _, f := range set {
f.Close()
}
cancel()
}
}()
if fifos.Stdin != "" {
if f, err = openPipe(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
return nil, nil, err
}
p.stdin = f
set = append(set, f)
}
if fifos.Stdout != "" {
if f, err = openPipe(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
return nil, nil, err
}
p.stdout = f
set = append(set, f)
}
if fifos.Stderr != "" {
if f, err = openPipe(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
return nil, nil, err
}
p.stderr = f
set = append(set, f)
}
return p, &wgCloser{
wg: &sync.WaitGroup{},
set: set,
ctx: ctx,
cancel: cancel,
}, nil
}

View File

@@ -1,31 +0,0 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"context"
"io"
"os"
"github.com/containerd/fifo"
)
func openPipe(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
return fifo.OpenFifo(ctx, fn, flag, perm)
}

View File

@@ -1,83 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"context"
"fmt"
"io"
"net"
"os"
"sync"
winio "github.com/Microsoft/go-winio"
)
type pipe struct {
l net.Listener
con net.Conn
conErr error
conWg sync.WaitGroup
}
func openPipe(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
l, err := winio.ListenPipe(fn, nil)
if err != nil {
return nil, err
}
p := &pipe{l: l}
p.conWg.Add(1)
go func() {
defer p.conWg.Done()
c, err := l.Accept()
if err != nil {
p.conErr = err
return
}
p.con = c
}()
go func() {
<-ctx.Done()
p.Close()
}()
return p, nil
}
func (p *pipe) Write(b []byte) (int, error) {
p.conWg.Wait()
if p.conErr != nil {
return 0, fmt.Errorf("connection error: %w", p.conErr)
}
return p.con.Write(b)
}
func (p *pipe) Read(b []byte) (int, error) {
p.conWg.Wait()
if p.conErr != nil {
return 0, fmt.Errorf("connection error: %w", p.conErr)
}
return p.con.Read(b)
}
func (p *pipe) Close() error {
p.l.Close()
p.conWg.Wait()
if p.con != nil {
return p.con.Close()
}
return p.conErr
}

View File

@@ -1,213 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"bufio"
"bytes"
"fmt"
"io"
"time"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
"github.com/containerd/log"
)
const (
// delimiter used in CRI logging format.
delimiter = ' '
// eof is end-of-line.
eol = '\n'
// timestampFormat is the timestamp format used in CRI logging format.
timestampFormat = time.RFC3339Nano
// defaultBufSize is the default size of the read buffer in bytes.
defaultBufSize = 4096
)
// NewDiscardLogger creates logger which discards all the input.
func NewDiscardLogger() io.WriteCloser {
return cioutil.NewNopWriteCloser(io.Discard)
}
// NewCRILogger returns a write closer which redirect container log into
// log file, and decorate the log line into CRI defined format. It also
// returns a channel which indicates whether the logger is stopped.
// maxLen is the max length limit of a line. A line longer than the
// limit will be cut into multiple lines.
func NewCRILogger(path string, w io.Writer, stream StreamType, maxLen int) (io.WriteCloser, <-chan struct{}) {
log.L.Debugf("Start writing stream %q to log file %q", stream, path)
prc, pwc := io.Pipe()
stop := make(chan struct{})
go func() {
redirectLogs(path, prc, w, stream, maxLen)
close(stop)
}()
return pwc, stop
}
// bufio.ReadLine in golang eats both read errors and tailing newlines
// (See https://golang.org/pkg/bufio/#Reader.ReadLine). When reading
// to io.EOF, it is impossible for the caller to figure out whether
// there is a newline at the end, for example:
// 1) When reading "CONTENT\n", it returns "CONTENT" without error;
// 2) When reading "CONTENT", it also returns "CONTENT" without error.
//
// To differentiate these 2 cases, we need to write a readLine function
// ourselves to not ignore the error.
//
// The code is similar with https://golang.org/src/bufio/bufio.go?s=9537:9604#L359.
// The only difference is that it returns all errors from `ReadSlice`.
//
// readLine returns err != nil if and only if line does not end with a new line.
func readLine(b *bufio.Reader) (line []byte, isPrefix bool, err error) {
line, err = b.ReadSlice('\n')
if err == bufio.ErrBufferFull {
// Handle the case where "\r\n" straddles the buffer.
if len(line) > 0 && line[len(line)-1] == '\r' {
// Unread the last '\r'
if err := b.UnreadByte(); err != nil {
panic(fmt.Sprintf("invalid unread %v", err))
}
line = line[:len(line)-1]
}
return line, true, nil
}
if len(line) == 0 {
if err != nil {
line = nil
}
return
}
if line[len(line)-1] == '\n' {
// "ReadSlice returns err != nil if and only if line does not end in delim"
// (See https://golang.org/pkg/bufio/#Reader.ReadSlice).
if err != nil {
panic(fmt.Sprintf("full read with unexpected error %v", err))
}
drop := 1
if len(line) > 1 && line[len(line)-2] == '\r' {
drop = 2
}
line = line[:len(line)-drop]
}
return
}
func redirectLogs(path string, rc io.ReadCloser, w io.Writer, s StreamType, maxLen int) {
defer rc.Close()
var (
stream = []byte(s)
delimiter = []byte{delimiter}
partial = []byte(runtime.LogTagPartial)
full = []byte(runtime.LogTagFull)
buf [][]byte
length int
bufSize = defaultBufSize
timeBuffer = make([]byte, len(timestampFormat))
lineBuffer = bytes.Buffer{}
)
// Make sure bufSize <= maxLen
if maxLen > 0 && maxLen < bufSize {
bufSize = maxLen
}
r := bufio.NewReaderSize(rc, bufSize)
writeLineBuffer := func(tag []byte, lineBytes [][]byte) {
timeBuffer = time.Now().AppendFormat(timeBuffer[:0], timestampFormat)
headers := [][]byte{timeBuffer, stream, tag}
lineBuffer.Reset()
for _, h := range headers {
lineBuffer.Write(h)
lineBuffer.Write(delimiter)
}
for _, l := range lineBytes {
lineBuffer.Write(l)
}
lineBuffer.WriteByte(eol)
if n, err := lineBuffer.WriteTo(w); err == nil {
outputEntries.Inc()
outputBytes.Inc(float64(n))
} else {
log.L.WithError(err).Errorf("Fail to write %q log to log file %q", s, path)
// Continue on write error to drain the container output.
}
}
for {
var stop bool
newLine, isPrefix, err := readLine(r)
// NOTE(random-liu): readLine can return actual content even if there is an error.
if len(newLine) > 0 {
inputEntries.Inc()
inputBytes.Inc(float64(len(newLine)))
// Buffer returned by ReadLine will change after
// next read, copy it.
l := make([]byte, len(newLine))
copy(l, newLine)
buf = append(buf, l)
length += len(l)
}
if err != nil {
if err == io.EOF {
log.L.Tracef("Getting EOF from stream %q while redirecting to log file %q", s, path)
} else {
log.L.WithError(err).Errorf("An error occurred when redirecting stream %q to log file %q", s, path)
}
if length == 0 {
// No content left to write, break.
break
}
// Stop after writing the content left in buffer.
stop = true
}
if maxLen > 0 && length > maxLen {
exceedLen := length - maxLen
last := buf[len(buf)-1]
if exceedLen > len(last) {
// exceedLen must <= len(last), or else the buffer
// should have be written in the previous iteration.
panic("exceed length should <= last buffer size")
}
buf[len(buf)-1] = last[:len(last)-exceedLen]
writeLineBuffer(partial, buf)
splitEntries.Inc()
buf = [][]byte{last[len(last)-exceedLen:]}
length = exceedLen
}
if isPrefix {
continue
}
if stop {
// readLine only returns error when the message doesn't
// end with a newline, in that case it should be treated
// as a partial line.
writeLineBuffer(partial, buf)
} else {
writeLineBuffer(full, buf)
}
buf = nil
length = 0
if stop {
break
}
}
log.L.Debugf("Finish redirecting stream %q to log file %q", s, path)
}

View File

@@ -1,259 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"bytes"
"io"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
)
func TestRedirectLogs(t *testing.T) {
// defaultBufSize is even number
const maxLen = defaultBufSize * 4
for desc, test := range map[string]struct {
input string
stream StreamType
maxLen int
tag []runtime.LogTag
content []string
}{
"stdout log": {
input: "test stdout log 1\ntest stdout log 2\n",
stream: Stdout,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
"test stdout log 1",
"test stdout log 2",
},
},
"stderr log": {
input: "test stderr log 1\ntest stderr log 2\n",
stream: Stderr,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
"test stderr log 1",
"test stderr log 2",
},
},
"log ends without newline": {
input: "test stderr log 1\ntest stderr log 2",
stream: Stderr,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagPartial,
},
content: []string{
"test stderr log 1",
"test stderr log 2",
},
},
"log length equal to buffer size": {
input: strings.Repeat("a", defaultBufSize) + "\n" + strings.Repeat("a", defaultBufSize) + "\n",
stream: Stdout,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize),
strings.Repeat("a", defaultBufSize),
},
},
"log length longer than buffer size": {
input: strings.Repeat("a", defaultBufSize*2+10) + "\n" + strings.Repeat("a", defaultBufSize*2+20) + "\n",
stream: Stdout,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize*2+10),
strings.Repeat("a", defaultBufSize*2+20),
},
},
"log length equal to max length": {
input: strings.Repeat("a", maxLen) + "\n" + strings.Repeat("a", maxLen) + "\n",
stream: Stdout,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", maxLen),
strings.Repeat("a", maxLen),
},
},
"log length exceed max length by 1": {
input: strings.Repeat("a", maxLen+1) + "\n" + strings.Repeat("a", maxLen+1) + "\n",
stream: Stdout,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagPartial,
runtime.LogTagFull,
runtime.LogTagPartial,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", maxLen),
"a",
strings.Repeat("a", maxLen),
"a",
},
},
"log length longer than max length": {
input: strings.Repeat("a", maxLen*2) + "\n" + strings.Repeat("a", maxLen*2+1) + "\n",
stream: Stdout,
maxLen: maxLen,
tag: []runtime.LogTag{
runtime.LogTagPartial,
runtime.LogTagFull,
runtime.LogTagPartial,
runtime.LogTagPartial,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", maxLen),
strings.Repeat("a", maxLen),
strings.Repeat("a", maxLen),
strings.Repeat("a", maxLen),
"a",
},
},
"max length shorter than buffer size": {
input: strings.Repeat("a", defaultBufSize*3/2+10) + "\n" + strings.Repeat("a", defaultBufSize*3/2+20) + "\n",
stream: Stdout,
maxLen: defaultBufSize / 2,
tag: []runtime.LogTag{
runtime.LogTagPartial,
runtime.LogTagPartial,
runtime.LogTagPartial,
runtime.LogTagFull,
runtime.LogTagPartial,
runtime.LogTagPartial,
runtime.LogTagPartial,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize*1/2),
strings.Repeat("a", defaultBufSize*1/2),
strings.Repeat("a", defaultBufSize*1/2),
strings.Repeat("a", 10),
strings.Repeat("a", defaultBufSize*1/2),
strings.Repeat("a", defaultBufSize*1/2),
strings.Repeat("a", defaultBufSize*1/2),
strings.Repeat("a", 20),
},
},
"log length longer than max length, and (maxLen % defaultBufSize != 0)": {
input: strings.Repeat("a", defaultBufSize*2+10) + "\n" + strings.Repeat("a", defaultBufSize*2+20) + "\n",
stream: Stdout,
maxLen: defaultBufSize * 3 / 2,
tag: []runtime.LogTag{
runtime.LogTagPartial,
runtime.LogTagFull,
runtime.LogTagPartial,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize*3/2),
strings.Repeat("a", defaultBufSize*1/2+10),
strings.Repeat("a", defaultBufSize*3/2),
strings.Repeat("a", defaultBufSize*1/2+20),
},
},
"no limit if max length is 0": {
input: strings.Repeat("a", defaultBufSize*10+10) + "\n" + strings.Repeat("a", defaultBufSize*10+20) + "\n",
stream: Stdout,
maxLen: 0,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize*10+10),
strings.Repeat("a", defaultBufSize*10+20),
},
},
"no limit if max length is negative": {
input: strings.Repeat("a", defaultBufSize*10+10) + "\n" + strings.Repeat("a", defaultBufSize*10+20) + "\n",
stream: Stdout,
maxLen: -1,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize*10+10),
strings.Repeat("a", defaultBufSize*10+20),
},
},
"log length longer than buffer size with tailing \\r\\n": {
input: strings.Repeat("a", defaultBufSize-1) + "\r\n" + strings.Repeat("a", defaultBufSize-1) + "\r\n",
stream: Stdout,
maxLen: -1,
tag: []runtime.LogTag{
runtime.LogTagFull,
runtime.LogTagFull,
},
content: []string{
strings.Repeat("a", defaultBufSize-1),
strings.Repeat("a", defaultBufSize-1),
},
},
} {
t.Run(desc, func(t *testing.T) {
rc := io.NopCloser(strings.NewReader(test.input))
buf := bytes.NewBuffer(nil)
wc := cioutil.NewNopWriteCloser(buf)
redirectLogs("test-path", rc, wc, test.stream, test.maxLen)
output := buf.String()
lines := strings.Split(output, "\n")
lines = lines[:len(lines)-1] // Discard empty string after last \n
assert.Len(t, lines, len(test.content))
for i := range lines {
fields := strings.SplitN(lines[i], string([]byte{delimiter}), 4)
require.Len(t, fields, 4)
_, err := time.Parse(timestampFormat, fields[0])
assert.NoError(t, err)
assert.EqualValues(t, test.stream, fields[1])
assert.Equal(t, string(test.tag[i]), fields[2])
assert.Equal(t, test.content[i], fields[3])
}
})
}
}

View File

@@ -1,42 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import "github.com/docker/go-metrics"
var (
inputEntries metrics.Counter
outputEntries metrics.Counter
inputBytes metrics.Counter
outputBytes metrics.Counter
splitEntries metrics.Counter
)
func init() {
// These CRI metrics record input and output logging volume.
ns := metrics.NewNamespace("containerd", "cri", nil)
inputEntries = ns.NewCounter("input_entries", "Number of log entries received")
outputEntries = ns.NewCounter("output_entries", "Number of log entries successfully written to disk")
inputBytes = ns.NewCounter("input_bytes", "Size of logs received")
outputBytes = ns.NewCounter("output_bytes", "Size of logs successfully written to disk")
splitEntries = ns.NewCounter("split_entries", "Number of extra log entries created by splitting the "+
"original log entry. This happens when the original log entry exceeds length limit. "+
"This metric does not count the original log entry.")
metrics.Register(ns)
}

View File

@@ -1,40 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package labels
const (
// criContainerdPrefix is common prefix for cri-containerd
criContainerdPrefix = "io.cri-containerd"
// ImageLabelKey is the label key indicating the image is managed by cri plugin.
ImageLabelKey = criContainerdPrefix + ".image"
// ImageLabelValue is the label value indicating the image is managed by cri plugin.
ImageLabelValue = "managed"
// PinnedImageLabelKey is the label value indicating the image is pinned.
PinnedImageLabelKey = criContainerdPrefix + ".pinned"
// PinnedImageLabelValue is the label value indicating the image is pinned.
PinnedImageLabelValue = "pinned"
// ContainerKindLabel is a label key indicating container is sandbox container or application container
ContainerKindLabel = criContainerdPrefix + ".kind"
// ContainerKindSandbox is a label value indicating container is sandbox container
ContainerKindSandbox = "sandbox"
// ContainerKindContainer is a label value indicating container is application container
ContainerKindContainer = "container"
// ContainerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest
ContainerMetadataExtension = criContainerdPrefix + ".container.metadata"
// SandboxMetadataExtension is an extension name that identify metadata of sandbox in CreateContainerRequest
SandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata"
)

View File

@@ -1,36 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nri
import (
"context"
"time"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
cstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
cri "k8s.io/cri-api/pkg/apis/runtime/v1"
)
type CRIImplementation interface {
Config() *criconfig.Config
SandboxStore() *sstore.Store
ContainerStore() *cstore.Store
ContainerMetadataExtensionKey() string
UpdateContainerResources(context.Context, cstore.Container, *cri.UpdateContainerResourcesRequest, cstore.Status) (cstore.Status, error)
StopContainer(context.Context, cstore.Container, time.Duration) error
}

View File

@@ -1,839 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nri
import (
"context"
"encoding/json"
"fmt"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/blockio"
"github.com/containerd/containerd/v2/pkg/cri/annotations"
"github.com/containerd/containerd/v2/pkg/cri/constants"
cstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/pkg/cri/util"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
cri "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/nri"
"github.com/containerd/nri/pkg/api"
nrigen "github.com/containerd/nri/pkg/runtime-tools/generate"
)
type API struct {
cri CRIImplementation
nri nri.API
}
func NewAPI(nri nri.API) *API {
return &API{
nri: nri,
}
}
func (a *API) IsDisabled() bool {
return a == nil || a.nri == nil || !a.nri.IsEnabled()
}
func (a *API) IsEnabled() bool { return !a.IsDisabled() }
func (a *API) Register(cri CRIImplementation) error {
if a.IsDisabled() {
return nil
}
a.cri = cri
nri.RegisterDomain(a)
return a.nri.Start()
}
//
// CRI-NRI lifecycle hook interface
//
// These functions are used to hook NRI into the processing of
// the corresponding CRI lifecycle events using the common NRI
// interface.
//
func (a *API) RunPodSandbox(ctx context.Context, criPod *sstore.Sandbox) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
err := a.nri.RunPodSandbox(ctx, pod)
if err != nil {
a.nri.StopPodSandbox(ctx, pod)
a.nri.RemovePodSandbox(ctx, pod)
}
return err
}
func (a *API) StopPodSandbox(ctx context.Context, criPod *sstore.Sandbox) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
err := a.nri.StopPodSandbox(ctx, pod)
return err
}
func (a *API) RemovePodSandbox(ctx context.Context, criPod *sstore.Sandbox) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
err := a.nri.RemovePodSandbox(ctx, pod)
return err
}
func (a *API) CreateContainer(ctx context.Context, ctrs *containers.Container, spec *runtimespec.Spec) (*api.ContainerAdjustment, error) {
ctr := a.nriContainer(ctrs, spec)
criPod, err := a.cri.SandboxStore().Get(ctr.GetPodSandboxID())
if err != nil {
return nil, err
}
pod := a.nriPodSandbox(&criPod)
adjust, err := a.nri.CreateContainer(ctx, pod, ctr)
return adjust, err
}
func (a *API) PostCreateContainer(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(criCtr, nil)
err := a.nri.PostCreateContainer(ctx, pod, ctr)
return err
}
func (a *API) StartContainer(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(criCtr, nil)
err := a.nri.StartContainer(ctx, pod, ctr)
return err
}
func (a *API) PostStartContainer(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(criCtr, nil)
err := a.nri.PostStartContainer(ctx, pod, ctr)
return err
}
func (a *API) UpdateContainerResources(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container, req *cri.LinuxContainerResources) (*cri.LinuxContainerResources, error) {
if a.IsDisabled() {
return nil, nil
}
const noOomAdj = 0
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(criCtr, nil)
r, err := a.nri.UpdateContainer(ctx, pod, ctr, api.FromCRILinuxResources(req))
if err != nil {
return nil, err
}
return r.ToCRI(noOomAdj), nil
}
func (a *API) PostUpdateContainerResources(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(criCtr, nil)
err := a.nri.PostUpdateContainer(ctx, pod, ctr)
return err
}
func (a *API) StopContainer(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container) error {
if a.IsDisabled() {
return nil
}
ctr := a.nriContainer(criCtr, nil)
if criPod == nil || criPod.ID == "" {
criPod = &sstore.Sandbox{
Metadata: sstore.Metadata{
ID: ctr.GetPodSandboxID(),
},
}
}
pod := a.nriPodSandbox(criPod)
err := a.nri.StopContainer(ctx, pod, ctr)
return err
}
func (a *API) NotifyContainerExit(ctx context.Context, criCtr *cstore.Container) {
if a.IsDisabled() {
return
}
ctr := a.nriContainer(criCtr, nil)
criPod, _ := a.cri.SandboxStore().Get(ctr.GetPodSandboxID())
if criPod.ID == "" {
criPod = sstore.Sandbox{
Metadata: sstore.Metadata{
ID: ctr.GetPodSandboxID(),
},
}
}
pod := a.nriPodSandbox(&criPod)
a.nri.NotifyContainerExit(ctx, pod, ctr)
}
func (a *API) RemoveContainer(ctx context.Context, criPod *sstore.Sandbox, criCtr *cstore.Container) error {
if a.IsDisabled() {
return nil
}
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(criCtr, nil)
err := a.nri.RemoveContainer(ctx, pod, ctr)
return err
}
func (a *API) UndoCreateContainer(ctx context.Context, criPod *sstore.Sandbox, id string, spec *runtimespec.Spec) {
if a.IsDisabled() {
return
}
pod := a.nriPodSandbox(criPod)
ctr := a.nriContainer(&containers.Container{ID: id}, spec)
err := a.nri.StopContainer(ctx, pod, ctr)
if err != nil {
log.G(ctx).WithError(err).Error("container creation undo (stop) failed")
}
err = a.nri.RemoveContainer(ctx, pod, ctr)
if err != nil {
log.G(ctx).WithError(err).Error("container creation undo (remove) failed")
}
}
func (a *API) WithContainerAdjustment() containerd.NewContainerOpts {
if a.IsDisabled() {
return func(context.Context, *containerd.Client, *containers.Container) error {
return nil
}
}
resourceCheckOpt := nrigen.WithResourceChecker(
func(r *runtimespec.LinuxResources) error {
if r != nil {
if a.cri.Config().DisableHugetlbController {
r.HugepageLimits = nil
}
}
return nil
},
)
rdtResolveOpt := nrigen.WithRdtResolver(
func(className string) (*runtimespec.LinuxIntelRdt, error) {
if className == "" {
return nil, nil
}
return &runtimespec.LinuxIntelRdt{
ClosID: className,
}, nil
},
)
blkioResolveOpt := nrigen.WithBlockIOResolver(
func(className string) (*runtimespec.LinuxBlockIO, error) {
if className == "" {
return nil, nil
}
blockIO, err := blockio.ClassNameToLinuxOCI(className)
if err != nil {
return nil, err
}
return blockIO, nil
},
)
return func(ctx context.Context, _ *containerd.Client, c *containers.Container) error {
spec := &runtimespec.Spec{}
if err := json.Unmarshal(c.Spec.GetValue(), spec); err != nil {
return fmt.Errorf("failed to unmarshal container OCI Spec for NRI: %w", err)
}
adjust, err := a.CreateContainer(ctx, c, spec)
if err != nil {
return fmt.Errorf("failed to get NRI adjustment for container: %w", err)
}
sgen := generate.Generator{Config: spec}
ngen := nrigen.SpecGenerator(&sgen, resourceCheckOpt, rdtResolveOpt, blkioResolveOpt)
err = ngen.Adjust(adjust)
if err != nil {
return fmt.Errorf("failed to NRI-adjust container Spec: %w", err)
}
adjusted, err := typeurl.MarshalAny(spec)
if err != nil {
return fmt.Errorf("failed to marshal NRI-adjusted Spec: %w", err)
}
c.Spec = adjusted
return nil
}
}
func (a *API) WithContainerExit(criCtr *cstore.Container) containerd.ProcessDeleteOpts {
if a.IsDisabled() {
return func(_ context.Context, _ containerd.Process) error {
return nil
}
}
return func(_ context.Context, _ containerd.Process) error {
a.NotifyContainerExit(context.Background(), criCtr)
return nil
}
}
//
// NRI-CRI 'domain' interface
//
// These functions are used to interface CRI pods and containers
// from the common NRI interface. They implement pod and container
// discovery, lookup and updating of container parameters.
//
const (
nriDomain = constants.K8sContainerdNamespace
)
func (a *API) GetName() string {
return nriDomain
}
func (a *API) ListPodSandboxes() []nri.PodSandbox {
pods := []nri.PodSandbox{}
for _, pod := range a.cri.SandboxStore().List() {
if pod.Status.Get().State != sstore.StateUnknown {
pod := pod
pods = append(pods, a.nriPodSandbox(&pod))
}
}
return pods
}
func (a *API) ListContainers() []nri.Container {
containers := []nri.Container{}
for _, ctr := range a.cri.ContainerStore().List() {
switch ctr.Status.Get().State() {
case cri.ContainerState_CONTAINER_EXITED:
continue
case cri.ContainerState_CONTAINER_UNKNOWN:
continue
}
ctr := ctr
containers = append(containers, a.nriContainer(&ctr, nil))
}
return containers
}
func (a *API) GetPodSandbox(id string) (nri.PodSandbox, bool) {
pod, err := a.cri.SandboxStore().Get(id)
if err != nil {
return nil, false
}
return a.nriPodSandbox(&pod), true
}
func (a *API) GetContainer(id string) (nri.Container, bool) {
ctr, err := a.cri.ContainerStore().Get(id)
if err != nil {
return nil, false
}
return a.nriContainer(&ctr, nil), true
}
func (a *API) UpdateContainer(ctx context.Context, u *api.ContainerUpdate) error {
ctr, err := a.cri.ContainerStore().Get(u.ContainerId)
if err != nil {
return nil
}
err = ctr.Status.UpdateSync(
func(status cstore.Status) (cstore.Status, error) {
criReq := &cri.UpdateContainerResourcesRequest{
ContainerId: u.ContainerId,
Linux: u.GetLinux().GetResources().ToCRI(0),
}
newStatus, err := a.cri.UpdateContainerResources(ctx, ctr, criReq, status)
return newStatus, err
},
)
if err != nil {
if !u.IgnoreFailure {
return err
}
}
return nil
}
func (a *API) EvictContainer(ctx context.Context, e *api.ContainerEviction) error {
ctr, err := a.cri.ContainerStore().Get(e.ContainerId)
if err != nil {
return nil
}
err = a.cri.StopContainer(ctx, ctr, 0)
if err != nil {
return err
}
return nil
}
//
// NRI integration wrapper for CRI Pods
//
type criPodSandbox struct {
*sstore.Sandbox
spec *runtimespec.Spec
pid uint32
}
func (a *API) nriPodSandbox(pod *sstore.Sandbox) *criPodSandbox {
criPod := &criPodSandbox{
Sandbox: pod,
spec: &runtimespec.Spec{},
}
if pod == nil || pod.Container == nil {
return criPod
}
ctx := ctrdutil.NamespacedContext()
task, err := pod.Container.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
log.L.WithError(err).Errorf("failed to get task for sandbox container %s",
pod.Container.ID())
}
return criPod
}
criPod.pid = task.Pid()
spec, err := task.Spec(ctx)
if err != nil {
log.L.WithError(err).Errorf("failed to get spec for sandbox container %s",
pod.Container.ID())
return criPod
}
criPod.spec = spec
return criPod
}
func (p *criPodSandbox) GetDomain() string {
return nriDomain
}
func (p *criPodSandbox) GetID() string {
if p.Sandbox == nil {
return ""
}
return p.ID
}
func (p *criPodSandbox) GetName() string {
if p.Sandbox == nil {
return ""
}
return p.Config.GetMetadata().GetName()
}
func (p *criPodSandbox) GetUID() string {
if p.Sandbox == nil {
return ""
}
return p.Config.GetMetadata().GetUid()
}
func (p *criPodSandbox) GetNamespace() string {
if p.Sandbox == nil {
return ""
}
return p.Config.GetMetadata().GetNamespace()
}
func (p *criPodSandbox) GetAnnotations() map[string]string {
if p.Sandbox == nil {
return nil
}
annotations := map[string]string{}
for key, value := range p.Config.GetAnnotations() {
annotations[key] = value
}
for key, value := range p.spec.Annotations {
annotations[key] = value
}
return annotations
}
func (p *criPodSandbox) GetLabels() map[string]string {
if p.Sandbox == nil {
return nil
}
labels := map[string]string{}
for key, value := range p.Config.GetLabels() {
labels[key] = value
}
if p.Sandbox.Container == nil {
return labels
}
ctx := ctrdutil.NamespacedContext()
ctrd := p.Sandbox.Container
ctrs, err := ctrd.Info(ctx, containerd.WithoutRefreshedMetadata)
if err != nil {
log.L.WithError(err).Errorf("failed to get info for sandbox container %s", ctrd.ID())
return labels
}
for key, value := range ctrs.Labels {
labels[key] = value
}
return labels
}
func (p *criPodSandbox) GetRuntimeHandler() string {
if p.Sandbox == nil {
return ""
}
return p.RuntimeHandler
}
func (p *criPodSandbox) GetLinuxPodSandbox() nri.LinuxPodSandbox {
return p
}
func (p *criPodSandbox) GetLinuxNamespaces() []*api.LinuxNamespace {
if p.spec.Linux != nil {
return api.FromOCILinuxNamespaces(p.spec.Linux.Namespaces)
}
return nil
}
func (p *criPodSandbox) GetPodLinuxOverhead() *api.LinuxResources {
if p.Sandbox == nil {
return nil
}
return api.FromCRILinuxResources(p.Config.GetLinux().GetOverhead())
}
func (p *criPodSandbox) GetPodLinuxResources() *api.LinuxResources {
if p.Sandbox == nil {
return nil
}
return api.FromCRILinuxResources(p.Config.GetLinux().GetResources())
}
func (p *criPodSandbox) GetLinuxResources() *api.LinuxResources {
if p.spec.Linux == nil {
return nil
}
return api.FromOCILinuxResources(p.spec.Linux.Resources, nil)
}
func (p *criPodSandbox) GetCgroupParent() string {
if p.Sandbox == nil {
return ""
}
return p.Config.GetLinux().GetCgroupParent()
}
func (p *criPodSandbox) GetCgroupsPath() string {
if p.spec.Linux == nil {
return ""
}
return p.spec.Linux.CgroupsPath
}
func (p *criPodSandbox) GetPid() uint32 {
return p.pid
}
//
// NRI integration wrapper for CRI Containers
//
type criContainer struct {
api *API
ctrs *containers.Container
spec *runtimespec.Spec
meta *cstore.Metadata
pid uint32
}
func (a *API) nriContainer(ctr interface{}, spec *runtimespec.Spec) *criContainer {
switch c := ctr.(type) {
case *cstore.Container:
ctx := ctrdutil.NamespacedContext()
pid := uint32(0)
ctrd := c.Container
ctrs, err := ctrd.Info(ctx, containerd.WithoutRefreshedMetadata)
if err != nil {
log.L.WithError(err).Errorf("failed to get info for container %s", ctrd.ID())
}
spec, err := ctrd.Spec(ctx)
if err != nil {
log.L.WithError(err).Errorf("failed to get OCI Spec for container %s", ctrd.ID())
spec = &runtimespec.Spec{}
}
task, err := ctrd.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
log.L.WithError(err).Errorf("failed to get task for container %s", ctrd.ID())
}
} else {
pid = task.Pid()
}
return &criContainer{
api: a,
ctrs: &ctrs,
meta: &c.Metadata,
spec: spec,
pid: pid,
}
case *containers.Container:
ctrs := c
meta := &cstore.Metadata{}
if ext := ctrs.Extensions[a.cri.ContainerMetadataExtensionKey()]; ext != nil {
err := typeurl.UnmarshalTo(ext, meta)
if err != nil {
log.L.WithError(err).Errorf("failed to get metadata for container %s", ctrs.ID)
}
}
return &criContainer{
api: a,
ctrs: ctrs,
meta: meta,
spec: spec,
}
}
log.L.Errorf("can't wrap %T as NRI container", ctr)
return &criContainer{
api: a,
meta: &cstore.Metadata{},
spec: &runtimespec.Spec{},
}
}
func (c *criContainer) GetDomain() string {
return nriDomain
}
func (c *criContainer) GetID() string {
if c.ctrs != nil {
return c.ctrs.ID
}
return ""
}
func (c *criContainer) GetPodSandboxID() string {
return c.spec.Annotations[annotations.SandboxID]
}
func (c *criContainer) GetName() string {
return c.spec.Annotations[annotations.ContainerName]
}
func (c *criContainer) GetState() api.ContainerState {
criCtr, err := c.api.cri.ContainerStore().Get(c.GetID())
if err != nil {
return api.ContainerState_CONTAINER_UNKNOWN
}
switch criCtr.Status.Get().State() {
case cri.ContainerState_CONTAINER_CREATED:
return api.ContainerState_CONTAINER_CREATED
case cri.ContainerState_CONTAINER_RUNNING:
return api.ContainerState_CONTAINER_RUNNING
case cri.ContainerState_CONTAINER_EXITED:
return api.ContainerState_CONTAINER_STOPPED
}
return api.ContainerState_CONTAINER_UNKNOWN
}
func (c *criContainer) GetLabels() map[string]string {
if c.ctrs == nil {
return nil
}
labels := map[string]string{}
for key, value := range c.ctrs.Labels {
labels[key] = value
}
if c.meta != nil && c.meta.Config != nil {
for key, value := range c.meta.Config.Labels {
labels[key] = value
}
}
return labels
}
func (c *criContainer) GetAnnotations() map[string]string {
annotations := map[string]string{}
for key, value := range c.spec.Annotations {
annotations[key] = value
}
if c.meta != nil && c.meta.Config != nil {
for key, value := range c.meta.Config.Annotations {
annotations[key] = value
}
}
return annotations
}
func (c *criContainer) GetArgs() []string {
if c.spec.Process == nil {
return nil
}
return api.DupStringSlice(c.spec.Process.Args)
}
func (c *criContainer) GetEnv() []string {
if c.spec.Process == nil {
return nil
}
return api.DupStringSlice(c.spec.Process.Env)
}
func (c *criContainer) GetMounts() []*api.Mount {
return api.FromOCIMounts(c.spec.Mounts)
}
func (c *criContainer) GetHooks() *api.Hooks {
return api.FromOCIHooks(c.spec.Hooks)
}
func (c *criContainer) GetLinuxContainer() nri.LinuxContainer {
return c
}
func (c *criContainer) GetLinuxNamespaces() []*api.LinuxNamespace {
if c.spec.Linux == nil {
return nil
}
return api.FromOCILinuxNamespaces(c.spec.Linux.Namespaces)
}
func (c *criContainer) GetLinuxDevices() []*api.LinuxDevice {
if c.spec.Linux == nil {
return nil
}
return api.FromOCILinuxDevices(c.spec.Linux.Devices)
}
func (c *criContainer) GetLinuxResources() *api.LinuxResources {
if c.spec.Linux == nil {
return nil
}
return api.FromOCILinuxResources(c.spec.Linux.Resources, c.spec.Annotations)
}
func (c *criContainer) GetOOMScoreAdj() *int {
if c.spec.Process == nil {
return nil
}
return c.spec.Process.OOMScoreAdj
}
func (c *criContainer) GetCgroupsPath() string {
if c.spec.Linux == nil {
return ""
}
return c.spec.Linux.CgroupsPath
}
func (c *criContainer) GetPid() uint32 {
return c.pid
}

View File

@@ -1,145 +0,0 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nri
import (
"context"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
cstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
"github.com/opencontainers/runtime-spec/specs-go"
cri "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/cri/constants"
"github.com/containerd/containerd/v2/pkg/nri"
"github.com/containerd/nri/pkg/api"
)
type API struct {
}
func NewAPI(nri.API) *API {
return nil
}
func (a *API) Register(CRIImplementation) error {
return nil
}
func (a *API) IsEnabled() bool {
return false
}
//
// CRI-NRI lifecycle hook no-op interface
//
func (*API) RunPodSandbox(context.Context, *sstore.Sandbox) error {
return nil
}
func (*API) StopPodSandbox(context.Context, *sstore.Sandbox) error {
return nil
}
func (*API) RemovePodSandbox(context.Context, *sstore.Sandbox) error {
return nil
}
func (*API) PostCreateContainer(context.Context, *sstore.Sandbox, *cstore.Container) error {
return nil
}
func (*API) StartContainer(context.Context, *sstore.Sandbox, *cstore.Container) error {
return nil
}
func (*API) PostStartContainer(context.Context, *sstore.Sandbox, *cstore.Container) error {
return nil
}
func (*API) UpdateContainerResources(context.Context, *sstore.Sandbox, *cstore.Container, *cri.LinuxContainerResources) (*cri.LinuxContainerResources, error) {
return nil, nil
}
func (*API) PostUpdateContainerResources(context.Context, *sstore.Sandbox, *cstore.Container) error {
return nil
}
func (*API) StopContainer(context.Context, *sstore.Sandbox, *cstore.Container) error {
return nil
}
func (*API) RemoveContainer(context.Context, *sstore.Sandbox, *cstore.Container) error {
return nil
}
func (*API) UndoCreateContainer(context.Context, *sstore.Sandbox, string, *specs.Spec) {
}
func (*API) WithContainerAdjustment() containerd.NewContainerOpts {
return func(ctx context.Context, _ *containerd.Client, c *containers.Container) error {
return nil
}
}
func (*API) WithContainerExit(*cstore.Container) containerd.ProcessDeleteOpts {
return func(_ context.Context, _ containerd.Process) error {
return nil
}
}
//
// NRI-CRI no-op 'domain' interface
//
const (
nriDomain = constants.K8sContainerdNamespace
)
func (*API) GetName() string {
return nriDomain
}
func (*API) ListPodSandboxes() []nri.PodSandbox {
return nil
}
func (*API) ListContainers() []nri.Container {
return nil
}
func (*API) GetPodSandbox(string) (nri.PodSandbox, bool) {
return nil, false
}
func (*API) GetContainer(string) (nri.Container, bool) {
return nil, false
}
func (*API) UpdateContainer(context.Context, *api.ContainerUpdate) error {
return nil
}
func (*API) EvictContainer(context.Context, *api.ContainerEviction) error {
return nil
}

View File

@@ -1,155 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"errors"
"fmt"
"os"
"strings"
"github.com/containerd/continuity/fs"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/errdefs"
"github.com/containerd/log"
)
// WithNewSnapshot wraps `containerd.WithNewSnapshot` so that if creating the
// snapshot fails we make sure the image is actually unpacked and retry.
func WithNewSnapshot(id string, i containerd.Image, opts ...snapshots.Opt) containerd.NewContainerOpts {
f := containerd.WithNewSnapshot(id, i, opts...)
return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
if err := f(ctx, client, c); err != nil {
if !errdefs.IsNotFound(err) {
return err
}
if err := i.Unpack(ctx, c.Snapshotter); err != nil {
return fmt.Errorf("error unpacking image: %w", err)
}
return f(ctx, client, c)
}
return nil
}
}
// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
// It doesn't update runtime spec.
// The passed in map is a host path to container path map for all volumes.
func WithVolumes(volumeMounts map[string]string, platform imagespec.Platform) containerd.NewContainerOpts {
return func(ctx context.Context, client *containerd.Client, c *containers.Container) (err error) {
if c.Snapshotter == "" {
return errors.New("no snapshotter set for container")
}
if c.SnapshotKey == "" {
return errors.New("rootfs not created for container")
}
snapshotter := client.SnapshotService(c.Snapshotter)
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
if err != nil {
return err
}
// Since only read is needed, append ReadOnly mount option to prevent linux kernel
// from syncing whole filesystem in umount syscall.
if len(mounts) == 1 && mounts[0].Type == "overlay" {
mounts[0].Options = append(mounts[0].Options, "ro")
}
root, err := os.MkdirTemp("", "ctd-volume")
if err != nil {
return err
}
// We change RemoveAll to Remove so that we either leak a temp dir
// if it fails but not RM snapshot data.
// refer to https://github.com/containerd/containerd/pull/1868
// https://github.com/containerd/containerd/pull/1785
defer os.Remove(root)
if err := mount.All(mounts, root); err != nil {
return fmt.Errorf("failed to mount: %w", err)
}
defer func() {
if uerr := mount.Unmount(root, 0); uerr != nil {
log.G(ctx).WithError(uerr).Errorf("Failed to unmount snapshot %q", root)
if err == nil {
err = uerr
}
}
}()
for host, volume := range volumeMounts {
if platform.OS == "windows" {
// Windows allows volume mounts in subfolders under C: and as any other drive letter like D:, E:, etc.
// An image may contain files inside a folder defined as a VOLUME in a Dockerfile. On Windows, images
// can only contain pre-existing files for volumes situated on the root filesystem, which is C:.
// For any other volumes, we need to skip attempting to copy existing contents.
//
// C:\some\volume --> \some\volume
// D:\some\volume --> skip
if len(volume) >= 2 && string(volume[1]) == ":" {
// Perform a case insensitive comparison to "C", and skip non-C mounted volumes.
if !strings.EqualFold(string(volume[0]), "c") {
continue
}
// This is a volume mounted somewhere under C:\. We strip the drive letter and allow fs.RootPath()
// to append the remaining path to the rootfs path as seen by the host OS.
volume = volume[2:]
}
}
src, err := fs.RootPath(root, volume)
if err != nil {
return fmt.Errorf("rootpath on mountPath %s, volume %s: %w", root, volume, err)
}
if _, err := os.Stat(src); err != nil {
if os.IsNotExist(err) {
// Skip copying directory if it does not exist.
continue
}
return fmt.Errorf("stat volume in rootfs: %w", err)
}
if err := copyExistingContents(src, host); err != nil {
return fmt.Errorf("taking runtime copy of volume: %w", err)
}
}
return nil
}
}
// copyExistingContents copies from the source to the destination and
// ensures the ownership is appropriately set.
func copyExistingContents(source, destination string) error {
f, err := os.Open(destination)
if err != nil {
return err
}
defer f.Close()
dstList, err := f.Readdirnames(-1)
if err != nil {
return err
}
if len(dstList) != 0 {
return fmt.Errorf("volume at %q is not initially empty", destination)
}
return fs.CopyDir(destination, source, fs.WithXAttrExclude("security.selinux"))
}

View File

@@ -1,119 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/oci"
osinterface "github.com/containerd/containerd/v2/pkg/os"
)
// WithDarwinMounts adds mounts from CRI's container config + extra mounts.
func WithDarwinMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, container *containers.Container, s *oci.Spec) error {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
// be kept.
var (
criMounts = config.GetMounts()
mounts = append([]*runtime.Mount{}, criMounts...)
)
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
for _, e := range extra {
found := false
for _, c := range criMounts {
if cleanMount(e.ContainerPath) == cleanMount(c.ContainerPath) {
found = true
break
}
}
if !found {
mounts = append(mounts, e)
}
}
// Sort mounts in number of parts. This ensures that high level mounts don't
// shadow other mounts.
sort.Sort(orderedMounts(mounts))
// Copy all mounts from default mounts, except for
// - mounts overridden by supplied mount;
mountSet := make(map[string]struct{})
for _, m := range mounts {
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
}
defaultMounts := s.Mounts
s.Mounts = nil
for _, m := range defaultMounts {
dst := cleanMount(m.Destination)
if _, ok := mountSet[dst]; ok {
// filter out mount overridden by a supplied mount
continue
}
s.Mounts = append(s.Mounts, m)
}
for _, mount := range mounts {
var (
dst = mount.GetContainerPath()
src = mount.GetHostPath()
)
// Create the host path if it doesn't exist.
if _, err := osi.Stat(src); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to stat %q: %w", src, err)
}
if err := osi.MkdirAll(src, 0755); err != nil {
return fmt.Errorf("failed to mkdir %q: %w", src, err)
}
}
src, err := osi.ResolveSymbolicLink(src)
if err != nil {
return fmt.Errorf("failed to resolve symlink %q: %w", src, err)
}
var options []string
if mount.GetReadonly() {
options = append(options, "ro")
} else {
options = append(options, "rw")
}
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: src,
Destination: dst,
Type: "bind",
Options: options,
})
}
return nil
}
}

View File

@@ -1,177 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
"github.com/containerd/cgroups/v3"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/log"
)
// Linux dependent OCI spec opts.
var (
swapControllerAvailability bool
swapControllerAvailabilityOnce sync.Once
)
// SwapControllerAvailable returns true if the swap controller is available
func SwapControllerAvailable() bool {
swapControllerAvailabilityOnce.Do(func() {
const warn = "Failed to detect the availability of the swap controller, assuming not available"
p := "/sys/fs/cgroup/memory/memory.memsw.limit_in_bytes"
if cgroups.Mode() == cgroups.Unified {
// memory.swap.max does not exist in the cgroup root, so we check /sys/fs/cgroup/<SELF>/memory.swap.max
_, unified, err := cgroups.ParseCgroupFileUnified("/proc/self/cgroup")
if err != nil {
err = fmt.Errorf("failed to parse /proc/self/cgroup: %w", err)
log.L.WithError(err).Warn(warn)
return
}
p = filepath.Join("/sys/fs/cgroup", unified, "memory.swap.max")
}
if _, err := os.Stat(p); err != nil {
if !errors.Is(err, os.ErrNotExist) {
log.L.WithError(err).Warn(warn)
}
return
}
swapControllerAvailability = true
})
return swapControllerAvailability
}
var (
supportsHugetlbOnce sync.Once
supportsHugetlb bool
)
func isHugetlbControllerPresent() bool {
supportsHugetlbOnce.Do(func() {
supportsHugetlb = false
if IsCgroup2UnifiedMode() {
supportsHugetlb = cgroupv2HasHugetlb()
} else {
supportsHugetlb = cgroupv1HasHugetlb()
}
})
return supportsHugetlb
}
var (
_cgroupv1HasHugetlbOnce sync.Once
_cgroupv1HasHugetlb bool
_cgroupv2HasHugetlbOnce sync.Once
_cgroupv2HasHugetlb bool
isUnifiedOnce sync.Once
isUnified bool
)
// cgroupv1HasHugetlb returns whether the hugetlb controller is present on
// cgroup v1.
func cgroupv1HasHugetlb() bool {
_cgroupv1HasHugetlbOnce.Do(func() {
if _, err := os.ReadDir("/sys/fs/cgroup/hugetlb"); err != nil {
_cgroupv1HasHugetlb = false
} else {
_cgroupv1HasHugetlb = true
}
})
return _cgroupv1HasHugetlb
}
// cgroupv2HasHugetlb returns whether the hugetlb controller is present on
// cgroup v2.
func cgroupv2HasHugetlb() bool {
_cgroupv2HasHugetlbOnce.Do(func() {
controllers, err := os.ReadFile("/sys/fs/cgroup/cgroup.controllers")
if err != nil {
return
}
_cgroupv2HasHugetlb = strings.Contains(string(controllers), "hugetlb")
})
return _cgroupv2HasHugetlb
}
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
func IsCgroup2UnifiedMode() bool {
isUnifiedOnce.Do(func() {
var st syscall.Statfs_t
if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
panic("cannot statfs cgroup root")
}
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
})
return isUnified
}
// WithCDI updates OCI spec with CDI content
func WithCDI(annotations map[string]string, CDIDevices []*runtime.CDIDevice) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *oci.Spec) error {
seen := make(map[string]bool)
// Add devices from CDIDevices CRI field
var devices []string
var err error
for _, device := range CDIDevices {
deviceName := device.Name
if seen[deviceName] {
log.G(ctx).Debugf("Skipping duplicated CDI device %s", deviceName)
continue
}
devices = append(devices, deviceName)
seen[deviceName] = true
}
log.G(ctx).Infof("Container %v: CDI devices from CRI Config.CDIDevices: %v", c.ID, devices)
// Add devices from CDI annotations
_, devsFromAnnotations, err := cdi.ParseAnnotations(annotations)
if err != nil {
return fmt.Errorf("failed to parse CDI device annotations: %w", err)
}
if devsFromAnnotations != nil {
log.G(ctx).Infof("Container %v: CDI devices from annotations: %v", c.ID, devsFromAnnotations)
for _, deviceName := range devsFromAnnotations {
if seen[deviceName] {
// TODO: change to Warning when passing CDI devices as annotations is deprecated
log.G(ctx).Debugf("Skipping duplicated CDI device %s", deviceName)
continue
}
devices = append(devices, deviceName)
seen[deviceName] = true
}
// TODO: change to Warning when passing CDI devices as annotations is deprecated
log.G(ctx).Debug("Passing CDI devices as annotations will be deprecated soon, please use CRI CDIDevices instead")
}
return oci.WithCDIDevices(devices...)(ctx, client, c, s)
}
}

View File

@@ -1,471 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"syscall"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/pkg/oci"
osinterface "github.com/containerd/containerd/v2/pkg/os"
"github.com/containerd/log"
)
// RuntimeConfig is a subset of [github.com/containerd/containerd/v2/pkg/cri/config].
// Needed for avoiding circular imports.
type RuntimeConfig struct {
TreatRoMountsAsRro bool // only applies to volumes
}
// WithMounts sorts and adds runtime and CRI mounts to the spec
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string, rtConfig *RuntimeConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
// be kept.
var (
criMounts = config.GetMounts()
mounts = append([]*runtime.Mount{}, criMounts...)
)
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
for _, e := range extra {
found := false
for _, c := range criMounts {
if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) {
found = true
break
}
}
if !found {
mounts = append(mounts, e)
}
}
// Sort mounts in number of parts. This ensures that high level mounts don't
// shadow other mounts.
sort.Sort(orderedMounts(mounts))
// Mount cgroup into the container as readonly, which inherits docker's behavior.
// TreatRoMountsAsRro does not apply here, as /sys/fs/cgroup is not a volume.
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
})
// Copy all mounts from default mounts, except for
// - mounts overridden by supplied mount;
// - all mounts under /dev if a supplied /dev is present.
mountSet := make(map[string]struct{})
for _, m := range mounts {
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
}
defaultMounts := s.Mounts
s.Mounts = nil
for _, m := range defaultMounts {
dst := filepath.Clean(m.Destination)
if _, ok := mountSet[dst]; ok {
// filter out mount overridden by a supplied mount
continue
}
if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") {
// filter out everything under /dev if /dev is a supplied mount
continue
}
s.Mounts = append(s.Mounts, m)
}
for _, mount := range mounts {
var (
dst = mount.GetContainerPath()
src = mount.GetHostPath()
)
// Create the host path if it doesn't exist.
// TODO(random-liu): Add CRI validation test for this case.
if _, err := osi.Stat(src); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to stat %q: %w", src, err)
}
if err := osi.MkdirAll(src, 0755); err != nil {
return fmt.Errorf("failed to mkdir %q: %w", src, err)
}
}
// TODO(random-liu): Add cri-containerd integration test or cri validation test
// for this.
src, err := osi.ResolveSymbolicLink(src)
if err != nil {
return fmt.Errorf("failed to resolve symlink %q: %w", src, err)
}
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
options := []string{"rbind"}
switch mount.GetPropagation() {
case runtime.MountPropagation_PROPAGATION_PRIVATE:
options = append(options, "rprivate")
// Since default root propagation in runc is rprivate ignore
// setting the root propagation
case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL:
if err := ensureShared(src, osi.LookupMount); err != nil {
return err
}
options = append(options, "rshared")
s.Linux.RootfsPropagation = "rshared"
case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER:
if err := ensureSharedOrSlave(src, osi.LookupMount); err != nil {
return err
}
options = append(options, "rslave")
if s.Linux.RootfsPropagation != "rshared" &&
s.Linux.RootfsPropagation != "rslave" {
s.Linux.RootfsPropagation = "rslave"
}
default:
log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath)
options = append(options, "rprivate")
}
var srcIsDir bool
if srcSt, err := osi.Stat(src); err != nil {
if errors.Is(err, os.ErrNotExist) { // happens when osi is FakeOS
srcIsDir = true // assume src to be dir
} else {
return fmt.Errorf("failed to stat mount source %q: %w", src, err)
}
} else if srcSt != nil { // srcSt can be nil when osi is FakeOS
srcIsDir = srcSt.IsDir()
}
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
if rtConfig != nil && rtConfig.TreatRoMountsAsRro && srcIsDir {
options = append(options, "rro")
} else {
options = append(options, "ro")
}
} else {
options = append(options, "rw")
}
if mount.GetSelinuxRelabel() {
ENOTSUP := syscall.Errno(0x5f) // Linux specific error code, this branch will not execute on non Linux platforms.
if err := label.Relabel(src, mountLabel, false); err != nil && err != ENOTSUP {
return fmt.Errorf("relabel %q with %q failed: %w", src, mountLabel, err)
}
}
var uidMapping []runtimespec.LinuxIDMapping
if mount.UidMappings != nil {
for _, mapping := range mount.UidMappings {
uidMapping = append(uidMapping, runtimespec.LinuxIDMapping{
HostID: mapping.HostId,
ContainerID: mapping.ContainerId,
Size: mapping.Length,
})
}
}
var gidMapping []runtimespec.LinuxIDMapping
if mount.GidMappings != nil {
for _, mapping := range mount.GidMappings {
gidMapping = append(gidMapping, runtimespec.LinuxIDMapping{
HostID: mapping.HostId,
ContainerID: mapping.ContainerId,
Size: mapping.Length,
})
}
}
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: src,
Destination: dst,
Type: "bind",
Options: options,
UIDMappings: uidMapping,
GIDMappings: gidMapping,
})
}
return nil
}
}
// Ensure mount point on which path is mounted, is shared.
func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error {
mountInfo, err := lookupMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(mountInfo.Optional, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
return nil
}
}
return fmt.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint)
}
// ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error {
mountInfo, err := lookupMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(mountInfo.Optional, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
return nil
} else if strings.HasPrefix(opt, "master:") {
return nil
}
}
return fmt.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
}
// getDeviceUserGroupID() is used to find the right uid/gid
// value for the device node created in the container namespace.
// The runtime executes mknod() and chmod()s the created
// device with the values returned here.
//
// On Linux, uid and gid are sufficient and the user/groupname do not
// need to be resolved.
//
// TODO(mythi): In case of user namespaces, the runtime simply bind
// mounts the devices from the host. Additional logic is needed
// to check that the runtimes effective UID/GID on the host has the
// permissions to access the device node and/or the right user namespace
// mappings are created.
//
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
if runAsVal != nil {
return uint32(runAsVal.GetValue())
}
return 0
}
// WithDevices sets the provided devices onto the container spec
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
oldDevices := len(s.Linux.Devices)
for _, device := range config.GetDevices() {
path, err := osi.ResolveSymbolicLink(device.HostPath)
if err != nil {
return err
}
o := oci.WithDevices(path, device.ContainerPath, device.Permissions)
if err := o(ctx, client, c, s); err != nil {
return err
}
}
if enableDeviceOwnershipFromSecurityContext {
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
// Loop all new devices added by oci.WithDevices() to update their
// dev.UID/dev.GID.
//
// non-zero UID/GID from SecurityContext is used to override host's
// device UID/GID for the container.
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
if UID != 0 {
*s.Linux.Devices[idx].UID = UID
}
if GID != 0 {
*s.Linux.Devices[idx].GID = GID
}
}
}
return nil
}
}
// WithResources sets the provided resource restrictions
func WithResources(resources *runtime.LinuxContainerResources, tolerateMissingHugetlbController, disableHugetlbController bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if resources == nil {
return nil
}
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
if s.Linux.Resources.CPU == nil {
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
}
if s.Linux.Resources.Memory == nil {
s.Linux.Resources.Memory = &runtimespec.LinuxMemory{}
}
var (
p = uint64(resources.GetCpuPeriod())
q = resources.GetCpuQuota()
shares = uint64(resources.GetCpuShares())
limit = resources.GetMemoryLimitInBytes()
swapLimit = resources.GetMemorySwapLimitInBytes()
hugepages = resources.GetHugepageLimits()
)
if p != 0 {
s.Linux.Resources.CPU.Period = &p
}
if q != 0 {
s.Linux.Resources.CPU.Quota = &q
}
if shares != 0 {
s.Linux.Resources.CPU.Shares = &shares
}
if cpus := resources.GetCpusetCpus(); cpus != "" {
s.Linux.Resources.CPU.Cpus = cpus
}
if mems := resources.GetCpusetMems(); mems != "" {
s.Linux.Resources.CPU.Mems = resources.GetCpusetMems()
}
if limit != 0 {
s.Linux.Resources.Memory.Limit = &limit
// swap/memory limit should be equal to prevent container from swapping by default
if swapLimit == 0 && SwapControllerAvailable() {
s.Linux.Resources.Memory.Swap = &limit
}
}
if swapLimit != 0 && SwapControllerAvailable() {
s.Linux.Resources.Memory.Swap = &swapLimit
}
if !disableHugetlbController {
if isHugetlbControllerPresent() {
for _, limit := range hugepages {
s.Linux.Resources.HugepageLimits = append(s.Linux.Resources.HugepageLimits, runtimespec.LinuxHugepageLimit{
Pagesize: limit.PageSize,
Limit: limit.Limit,
})
}
} else {
if !tolerateMissingHugetlbController {
return errors.New("huge pages limits are specified but hugetlb cgroup controller is missing. " +
"Please set tolerate_missing_hugetlb_controller to `true` to ignore this error")
}
log.L.Warn("hugetlb cgroup controller is absent. skipping huge pages limits")
}
}
if unified := resources.GetUnified(); unified != nil {
if s.Linux.Resources.Unified == nil {
s.Linux.Resources.Unified = make(map[string]string)
}
for k, v := range unified {
s.Linux.Resources.Unified[k] = v
}
}
return nil
}
}
// WithOOMScoreAdj sets the oom score
func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
resources := config.GetLinux().GetResources()
if resources == nil {
return nil
}
adj := int(resources.GetOomScoreAdj())
if restrict {
var err error
adj, err = restrictOOMScoreAdj(adj)
if err != nil {
return err
}
}
s.Process.OOMScoreAdj = &adj
return nil
}
}
// WithPodOOMScoreAdj sets the oom score for the pod sandbox
func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
if restrict {
var err error
adj, err = restrictOOMScoreAdj(adj)
if err != nil {
return err
}
}
s.Process.OOMScoreAdj = &adj
return nil
}
}
func getCurrentOOMScoreAdj() (int, error) {
b, err := os.ReadFile("/proc/self/oom_score_adj")
if err != nil {
return 0, fmt.Errorf("could not get the daemon oom_score_adj: %w", err)
}
s := strings.TrimSpace(string(b))
i, err := strconv.Atoi(s)
if err != nil {
return 0, fmt.Errorf("could not get the daemon oom_score_adj: %w", err)
}
return i, nil
}
func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) {
currentOOMScoreAdj, err := getCurrentOOMScoreAdj()
if err != nil {
return preferredOOMScoreAdj, err
}
if preferredOOMScoreAdj < currentOOMScoreAdj {
return currentOOMScoreAdj, nil
}
return preferredOOMScoreAdj, nil
}

View File

@@ -1,47 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMergeGids(t *testing.T) {
gids1 := []uint32{3, 2, 1}
gids2 := []uint32{2, 3, 4}
assert.Equal(t, []uint32{1, 2, 3, 4}, mergeGids(gids1, gids2))
}
func TestRestrictOOMScoreAdj(t *testing.T) {
current, err := getCurrentOOMScoreAdj()
require.NoError(t, err)
got, err := restrictOOMScoreAdj(current - 1)
require.NoError(t, err)
assert.Equal(t, got, current)
got, err = restrictOOMScoreAdj(current)
require.NoError(t, err)
assert.Equal(t, got, current)
got, err = restrictOOMScoreAdj(current + 1)
require.NoError(t, err)
assert.Equal(t, got, current+1)
}

View File

@@ -1,42 +0,0 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/oci"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func isHugetlbControllerPresent() bool {
return false
}
func SwapControllerAvailable() bool {
return false
}
// WithCDI does nothing on non-Linux platforms.
func WithCDI(_ map[string]string, _ []*runtime.CDIDevice) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, container *containers.Container, spec *oci.Spec) error {
return nil
}
}

View File

@@ -1,36 +0,0 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/errdefs"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func WithProcessCommandLineOrArgsForWindows(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
return errdefs.ErrNotImplemented
}
}

View File

@@ -1,375 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/cri/util"
"github.com/containerd/containerd/v2/pkg/oci"
)
// DefaultSandboxCPUshares is default cpu shares for sandbox container.
// TODO(windows): Revisit cpu shares for windows (https://github.com/containerd/cri/issues/1297)
const DefaultSandboxCPUshares = 2
// WithRelativeRoot sets the root for the container
func WithRelativeRoot(root string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Root == nil {
s.Root = &runtimespec.Root{}
}
s.Root.Path = root
return nil
}
}
// WithoutRoot sets the root to nil for the container.
func WithoutRoot(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
s.Root = nil
return nil
}
// WithProcessArgs sets the process args on the spec based on the image and runtime config
func WithProcessArgs(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
command, args := config.GetCommand(), config.GetArgs()
// The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
// TODO(random-liu): Clearly define the commands overwrite behavior.
if len(command) == 0 {
// Copy array to avoid data race.
if len(args) == 0 {
args = append([]string{}, image.Cmd...)
}
if command == nil {
if !(len(image.Entrypoint) == 1 && image.Entrypoint[0] == "") {
command = append([]string{}, image.Entrypoint...)
}
}
}
if len(command) == 0 && len(args) == 0 {
return errors.New("no command specified")
}
return oci.WithProcessArgs(append(command, args...)...)(ctx, client, c, s)
}
}
// mounts defines how to sort runtime.Mount.
// This is the same with the Docker implementation:
//
// https://github.com/moby/moby/blob/17.05.x/daemon/volumes.go#L26
type orderedMounts []*runtime.Mount
// Len returns the number of mounts. Used in sorting.
func (m orderedMounts) Len() int {
return len(m)
}
// Less returns true if the number of parts (a/b/c would be 3 parts) in the
// mount indexed by parameter 1 is less than that of the mount indexed by
// parameter 2. Used in sorting.
func (m orderedMounts) Less(i, j int) bool {
return m.parts(i) < m.parts(j)
}
// Swap swaps two items in an array of mounts. Used in sorting
func (m orderedMounts) Swap(i, j int) {
m[i], m[j] = m[j], m[i]
}
// parts returns the number of parts in the destination of a mount. Used in sorting.
func (m orderedMounts) parts(i int) int {
return strings.Count(filepath.Clean(m[i].ContainerPath), string(os.PathSeparator))
}
// WithAnnotation sets the provided annotation
func WithAnnotation(k, v string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Annotations == nil {
s.Annotations = make(map[string]string)
}
s.Annotations[k] = v
return nil
}
}
// WithAdditionalGIDs adds any additional groups listed for a particular user in the
// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array.
func WithAdditionalGIDs(userstr string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
gids := s.Process.User.AdditionalGids
if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil {
return err
}
// Merge existing gids and new gids.
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids)
return nil
}
}
func mergeGids(gids1, gids2 []uint32) []uint32 {
gidsMap := make(map[uint32]struct{})
for _, gid1 := range gids1 {
gidsMap[gid1] = struct{}{}
}
for _, gid2 := range gids2 {
gidsMap[gid2] = struct{}{}
}
var gids []uint32
for gid := range gidsMap {
gids = append(gids, gid)
}
sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] })
return gids
}
// WithoutDefaultSecuritySettings removes the default security settings generated on a spec
func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
// Make sure no default seccomp/apparmor is specified
s.Process.ApparmorProfile = ""
if s.Linux != nil {
s.Linux.Seccomp = nil
}
// Remove default rlimits (See https://github.com/containerd/cri/issues/515)
s.Process.Rlimits = nil
return nil
}
// WithCapabilities sets the provided capabilities from the security context
func WithCapabilities(sc *runtime.LinuxContainerSecurityContext, allCaps []string) oci.SpecOpts {
capabilities := sc.GetCapabilities()
if capabilities == nil {
return nullOpt
}
var opts []oci.SpecOpts
// Add/drop all capabilities if "all" is specified, so that
// following individual add/drop could still work. E.g.
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") {
opts = append(opts, oci.WithCapabilities(allCaps))
}
if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") {
opts = append(opts, oci.WithCapabilities(nil))
}
var caps []string
for _, c := range capabilities.GetAddCapabilities() {
if strings.ToUpper(c) == "ALL" {
continue
}
// Capabilities in CRI doesn't have `CAP_` prefix, so add it.
caps = append(caps, "CAP_"+strings.ToUpper(c))
}
opts = append(opts, oci.WithAddedCapabilities(caps))
caps = []string{}
for _, c := range capabilities.GetDropCapabilities() {
if strings.ToUpper(c) == "ALL" {
continue
}
caps = append(caps, "CAP_"+strings.ToUpper(c))
}
opts = append(opts, oci.WithDroppedCapabilities(caps))
return oci.Compose(opts...)
}
func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error {
return nil
}
// WithoutAmbientCaps removes the ambient caps from the spec
func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
if s.Process.Capabilities == nil {
s.Process.Capabilities = &runtimespec.LinuxCapabilities{}
}
s.Process.Capabilities.Ambient = nil
return nil
}
// WithDisabledCgroups clears the Cgroups Path from the spec
func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
s.Linux.CgroupsPath = ""
return nil
}
// WithSelinuxLabels sets the mount and process labels
func WithSelinuxLabels(process, mount string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
s.Linux.MountLabel = mount
s.Process.SelinuxLabel = process
return nil
}
}
// WithSysctls sets the provided sysctls onto the spec
func WithSysctls(sysctls map[string]string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Sysctl == nil {
s.Linux.Sysctl = make(map[string]string)
}
for k, v := range sysctls {
s.Linux.Sysctl[k] = v
}
return nil
}
}
// WithSupplementalGroups sets the supplemental groups for the process
func WithSupplementalGroups(groups []int64) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
var guids []uint32
for _, g := range groups {
guids = append(guids, uint32(g))
}
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids)
return nil
}
}
// WithDefaultSandboxShares sets the default sandbox CPU shares
func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
if s.Linux.Resources.CPU == nil {
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
}
i := uint64(DefaultSandboxCPUshares)
s.Linux.Resources.CPU.Shares = &i
return nil
}
// WithoutNamespace removes the provided namespace
func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
return nil
}
var namespaces []runtimespec.LinuxNamespace
for i, ns := range s.Linux.Namespaces {
if ns.Type != t {
namespaces = append(namespaces, s.Linux.Namespaces[i])
}
}
s.Linux.Namespaces = namespaces
return nil
}
}
// WithPodNamespaces sets the pod namespaces for the container
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
namespaces := config.GetNamespaceOptions()
opts := []oci.SpecOpts{
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(sandboxPid)}),
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(sandboxPid)}),
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(sandboxPid)}),
}
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
}
if namespaces.GetUsernsOptions() != nil {
switch namespaces.GetUsernsOptions().GetMode() {
case runtime.NamespaceMode_NODE:
// Nothing to do. Not adding userns field uses the node userns.
case runtime.NamespaceMode_POD:
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
opts = append(opts, oci.WithUserNamespace(uids, gids))
}
}
return oci.Compose(opts...)
}
const (
// netNSFormat is the format of network namespace of a process.
netNSFormat = "/proc/%v/ns/net"
// ipcNSFormat is the format of ipc namespace of a process.
ipcNSFormat = "/proc/%v/ns/ipc"
// utsNSFormat is the format of uts namespace of a process.
utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid"
// userNSFormat is the format of user namespace of a process.
userNSFormat = "/proc/%v/ns/user"
)
// GetNetworkNamespace returns the network namespace of a process.
func GetNetworkNamespace(pid uint32) string {
return fmt.Sprintf(netNSFormat, pid)
}
// GetIPCNamespace returns the ipc namespace of a process.
func GetIPCNamespace(pid uint32) string {
return fmt.Sprintf(ipcNSFormat, pid)
}
// GetUTSNamespace returns the uts namespace of a process.
func GetUTSNamespace(pid uint32) string {
return fmt.Sprintf(utsNSFormat, pid)
}
// GetPIDNamespace returns the pid namespace of a process.
func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid)
}
// GetUserNamespace returns the user namespace of a process.
func GetUserNamespace(pid uint32) string {
return fmt.Sprintf(userNSFormat, pid)
}

View File

@@ -1,46 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"sort"
"testing"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestOrderedMounts(t *testing.T) {
mounts := []*runtime.Mount{
{ContainerPath: "/a/b/c"},
{ContainerPath: "/a/b"},
{ContainerPath: "/a/b/c/d"},
{ContainerPath: "/a"},
{ContainerPath: "/b"},
{ContainerPath: "/b/c"},
}
expected := []*runtime.Mount{
{ContainerPath: "/a"},
{ContainerPath: "/b"},
{ContainerPath: "/a/b"},
{ContainerPath: "/b/c"},
{ContainerPath: "/a/b/c"},
{ContainerPath: "/a/b/c/d"},
}
sort.Stable(orderedMounts(mounts))
assert.Equal(t, expected, mounts)
}

View File

@@ -1,121 +0,0 @@
//go:build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"errors"
"strings"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/windows"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func escapeAndCombineArgsWindows(args []string) string {
escaped := make([]string, len(args))
for i, a := range args {
escaped[i] = windows.EscapeArg(a)
}
return strings.Join(escaped, " ")
}
// WithProcessCommandLineOrArgsForWindows sets the process command line or process args on the spec based on the image
// and runtime config
// If image.ArgsEscaped field is set, this function sets the process command line and if not, it sets the
// process args field
func WithProcessCommandLineOrArgsForWindows(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
if image.ArgsEscaped { //nolint:staticcheck // ArgsEscaped is deprecated
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
// firstArgFromImg is a flag that is returned to indicate that the first arg in the slice comes from either the
// image Entrypoint or Cmd. If the first arg instead comes from the container config (e.g. overriding the image values),
// it should be false. This is done to support the non-OCI ArgsEscaped field that Docker used to determine how the image
// entrypoint and cmd should be interpreted.
//
args, firstArgFromImg, err := getArgs(image.Entrypoint, image.Cmd, config.GetCommand(), config.GetArgs())
if err != nil {
return err
}
var cmdLine string
if image.ArgsEscaped && firstArgFromImg { //nolint:staticcheck // ArgsEscaped is deprecated
cmdLine = args[0]
if len(args) > 1 {
cmdLine += " " + escapeAndCombineArgsWindows(args[1:])
}
} else {
cmdLine = escapeAndCombineArgsWindows(args)
}
return oci.WithProcessCommandLine(cmdLine)(ctx, client, c, s)
}
}
// if ArgsEscaped is not set
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
args, _, err := getArgs(image.Entrypoint, image.Cmd, config.GetCommand(), config.GetArgs())
if err != nil {
return err
}
return oci.WithProcessArgs(args...)(ctx, client, c, s)
}
}
// getArgs is used to evaluate the overall args for the container by taking into account the image command and entrypoints
// along with the container command and entrypoints specified through the podspec if any
func getArgs(imgEntrypoint []string, imgCmd []string, ctrEntrypoint []string, ctrCmd []string) ([]string, bool, error) {
//nolint:dupword
// firstArgFromImg is a flag that is returned to indicate that the first arg in the slice comes from either the image
// Entrypoint or Cmd. If the first arg instead comes from the container config (e.g. overriding the image values),
// it should be false.
// Essentially this means firstArgFromImg should be true iff:
// Ctr entrypoint ctr cmd image entrypoint image cmd firstArgFromImg
// --------------------------------------------------------------------------------
// nil nil exists nil true
// nil nil nil exists true
// This is needed to support the non-OCI ArgsEscaped field used by Docker. ArgsEscaped is used for
// Windows images to indicate that the command has already been escaped and should be
// used directly as the command line.
var firstArgFromImg bool
entrypoint, cmd := ctrEntrypoint, ctrCmd
// The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
// TODO(random-liu): Clearly define the commands overwrite behavior.
if len(entrypoint) == 0 {
// Copy array to avoid data race.
if len(cmd) == 0 {
cmd = append([]string{}, imgCmd...)
if len(imgCmd) > 0 {
firstArgFromImg = true
}
}
if entrypoint == nil {
entrypoint = append([]string{}, imgEntrypoint...)
if len(imgEntrypoint) > 0 || len(ctrCmd) == 0 {
firstArgFromImg = true
}
}
}
if len(entrypoint) == 0 && len(cmd) == 0 {
return nil, false, errors.New("no command specified")
}
return append(entrypoint, cmd...), firstArgFromImg, nil
}

View File

@@ -1,261 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/oci"
osinterface "github.com/containerd/containerd/v2/pkg/os"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// namedPipePath returns true if the given path is to a named pipe.
func namedPipePath(p string) bool {
return strings.HasPrefix(p, `\\.\pipe\`)
}
// cleanMount returns a cleaned version of the mount path. The input is returned
// as-is if it is a named pipe path.
func cleanMount(p string) string {
if namedPipePath(p) {
return p
}
return filepath.Clean(p)
}
func parseMount(osi osinterface.OS, mount *runtime.Mount) (*runtimespec.Mount, error) {
var (
dst = mount.GetContainerPath()
src = mount.GetHostPath()
)
// In the case of a named pipe mount on Windows, don't stat the file
// or do other operations that open it, as that could interfere with
// the listening process. filepath.Clean also breaks named pipe
// paths, so don't use it.
if !namedPipePath(src) {
if _, err := osi.Stat(src); err != nil {
// Create the host path if it doesn't exist. This will align
// the behavior with the Linux implementation, but it doesn't
// align with Docker's behavior on Windows.
if !os.IsNotExist(err) {
return nil, fmt.Errorf("failed to stat %q: %w", src, err)
}
if err := osi.MkdirAll(src, 0755); err != nil {
return nil, fmt.Errorf("failed to mkdir %q: %w", src, err)
}
}
var err error
originalSrc := src
src, err = osi.ResolveSymbolicLink(src)
if err != nil {
return nil, fmt.Errorf("failed to resolve symlink %q: %w", originalSrc, err)
}
// hcsshim requires clean path, especially '/' -> '\'. Additionally,
// for the destination, absolute paths should have the C: prefix.
src = filepath.Clean(src)
// filepath.Clean adds a '.' at the end if the path is a
// drive (like Z:, E: etc.). Keeping this '.' in the path
// causes incorrect parameter error when starting the
// container on windows. Remove it here.
if !(len(dst) == 2 && dst[1] == ':') {
dst = filepath.Clean(dst)
if dst[0] == '\\' {
dst = "C:" + dst
}
} else if dst[0] == 'c' || dst[0] == 'C' {
return nil, fmt.Errorf("destination path can not be C drive")
}
}
var options []string
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
options = append(options, "ro")
} else {
options = append(options, "rw")
}
return &runtimespec.Mount{Source: src, Destination: dst, Options: options}, nil
}
// WithWindowsMounts sorts and adds runtime and CRI mounts to the spec for
// windows container.
func WithWindowsMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) error {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
// be kept.
var (
criMounts = config.GetMounts()
mounts = append([]*runtime.Mount{}, criMounts...)
)
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
for _, e := range extra {
found := false
for _, c := range criMounts {
if cleanMount(e.ContainerPath) == cleanMount(c.ContainerPath) {
found = true
break
}
}
if !found {
mounts = append(mounts, e)
}
}
// Sort mounts in number of parts. This ensures that high level mounts don't
// shadow other mounts.
sort.Sort(orderedMounts(mounts))
// Copy all mounts from default mounts, except for
// mounts overridden by supplied mount;
mountSet := make(map[string]struct{})
for _, m := range mounts {
mountSet[cleanMount(m.ContainerPath)] = struct{}{}
}
defaultMounts := s.Mounts
s.Mounts = nil
for _, m := range defaultMounts {
dst := cleanMount(m.Destination)
if _, ok := mountSet[dst]; ok {
// filter out mount overridden by a supplied mount
continue
}
s.Mounts = append(s.Mounts, m)
}
for _, mount := range mounts {
parsedMount, err := parseMount(osi, mount)
if err != nil {
return err
}
s.Mounts = append(s.Mounts, *parsedMount)
}
return nil
}
}
// WithWindowsResources sets the provided resource restrictions for windows.
func WithWindowsResources(resources *runtime.WindowsContainerResources) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if resources == nil {
return nil
}
if s.Windows == nil {
s.Windows = &runtimespec.Windows{}
}
if s.Windows.Resources == nil {
s.Windows.Resources = &runtimespec.WindowsResources{}
}
if s.Windows.Resources.Memory == nil {
s.Windows.Resources.Memory = &runtimespec.WindowsMemoryResources{}
}
var (
count = uint64(resources.GetCpuCount())
shares = uint16(resources.GetCpuShares())
max = uint16(resources.GetCpuMaximum())
limit = uint64(resources.GetMemoryLimitInBytes())
)
if s.Windows.Resources.CPU == nil && (count != 0 || shares != 0 || max != 0) {
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
}
if count != 0 {
s.Windows.Resources.CPU.Count = &count
}
if shares != 0 {
s.Windows.Resources.CPU.Shares = &shares
}
if max != 0 {
s.Windows.Resources.CPU.Maximum = &max
}
if limit != 0 {
s.Windows.Resources.Memory.Limit = &limit
}
return nil
}
}
// WithWindowsDefaultSandboxShares sets the default sandbox CPU shares
func WithWindowsDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Windows == nil {
s.Windows = &runtimespec.Windows{}
}
if s.Windows.Resources == nil {
s.Windows.Resources = &runtimespec.WindowsResources{}
}
if s.Windows.Resources.CPU == nil {
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
}
i := uint16(DefaultSandboxCPUshares)
s.Windows.Resources.CPU.Shares = &i
return nil
}
// WithWindowsCredentialSpec assigns `credentialSpec` to the
// `runtime.Spec.Windows.CredentialSpec` field.
func WithWindowsCredentialSpec(credentialSpec string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Windows == nil {
s.Windows = &runtimespec.Windows{}
}
s.Windows.CredentialSpec = credentialSpec
return nil
}
}
// WithWindowsDevices sets the provided devices onto the container spec
func WithWindowsDevices(config *runtime.ContainerConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
for _, device := range config.GetDevices() {
if device.ContainerPath != "" {
return fmt.Errorf("unexpected ContainerPath %s, must be empty", device.ContainerPath)
}
if device.Permissions != "" {
return fmt.Errorf("unexpected Permissions %s, must be empty", device.Permissions)
}
hostPath := device.HostPath
if strings.HasPrefix(hostPath, "class/") {
hostPath = "class://" + strings.TrimPrefix(hostPath, "class/")
}
idType, id, ok := strings.Cut(hostPath, "://")
if !ok {
return fmt.Errorf("unrecognised HostPath format %v, must match IDType://ID", device.HostPath)
}
o := oci.WithWindowsDevice(idType, id)
if err := o(ctx, client, c, s); err != nil {
return fmt.Errorf("failed adding device with HostPath %v: %w", device.HostPath, err)
}
}
return nil
}
}

View File

@@ -1,249 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"fmt"
"strings"
"testing"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/pkg/namespaces"
"github.com/containerd/containerd/v2/pkg/oci"
osinterface "github.com/containerd/containerd/v2/pkg/os"
)
func TestWithDevices(t *testing.T) {
testcases := []struct {
name string
devices []*runtime.Device
isLCOW bool
expectError bool
expectedWindowsDevices []specs.WindowsDevice
}{
{
name: "empty",
expectError: false,
},
// The only supported field is HostPath
{
name: "empty fields",
devices: []*runtime.Device{{}},
expectError: true,
},
{
name: "containerPath",
devices: []*runtime.Device{{ContainerPath: "something"}},
expectError: true,
},
{
name: "permissions",
devices: []*runtime.Device{{Permissions: "something"}},
expectError: true,
},
// Produced by https://github.com/aarnaud/k8s-directx-device-plugin/blob/0f3db32622daa577c85621941682bee6f9080954/cmd/k8s-device-plugin/main.go
// This is also the syntax dockershim and cri-dockerd support (or rather, pass through to docker, which parses this syntax)
{
name: "hostPath_docker_style",
devices: []*runtime.Device{{HostPath: "class/5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: "5B45201D-F2F2-4F3B-85BB-30FF1F953599", IDType: "class"}},
},
// Docker _only_ accepts `class` ID Type, so anything else should fail.
// See https://github.com/moby/moby/blob/v20.10.13/daemon/oci_windows.go#L283-L294
{
name: "hostPath_docker_style_non-class_idtype",
devices: []*runtime.Device{{HostPath: "vpci-location-path/5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
expectError: true,
},
// A bunch of examples from https://github.com/microsoft/hcsshim/blob/v0.9.2/test/cri-containerd/container_virtual_device_test.go
{
name: "hostPath_hcsshim_lcow_gpu",
// Not actually a GPU PCIP instance, but my personal machine doesn't have any PCIP devices, so I found one on the 'net.
devices: []*runtime.Device{{HostPath: `gpu://PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`}},
isLCOW: true,
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: `PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`, IDType: "gpu"}},
},
{
name: "hostPath_hcsshim_wcow_location_path",
devices: []*runtime.Device{{HostPath: "vpci-location-path://PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)"}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: "PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)", IDType: "vpci-location-path"}},
},
{
name: "hostPath_hcsshim_wcow_class_guid",
devices: []*runtime.Device{{HostPath: "class://5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: "5B45201D-F2F2-4F3B-85BB-30FF1F953599", IDType: "class"}},
},
{
name: "hostPath_hcsshim_wcow_gpu_hyper-v",
// Not actually a GPU PCIP instance, but my personal machine doesn't have any PCIP devices, so I found one on the 'net.
devices: []*runtime.Device{{HostPath: `vpci://PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: `PCIP\VEN_8086&DEV_43A2&SUBSYS_72708086&REV_00\3&11583659&0&F5`, IDType: "vpci"}},
},
// Example from https://github.com/microsoft/hcsshim/blob/v0.9.2/test/cri-containerd/container_test.go
// According to https://github.com/jterry75/cri/blob/f8e83e63cc027d0e9c0c984f9db3cba58d3672d4/pkg/server/container_create_windows.go#L625-L649
// this is intended to generate LinuxDevice entries that the GCS shim in a LCOW container host will remap
// into device mounts from its own in-UVM kernel.
// From discussion on https://github.com/containerd/containerd/pull/6618, we reject this syntax for now.
{
name: "hostPath_hcsshim_lcow_sandbox_device",
devices: []*runtime.Device{{HostPath: "/dev/fuse"}},
isLCOW: true,
expectError: true,
},
// Some edge cases suggested by the above real-world examples
{
name: "hostPath_no_slash",
devices: []*runtime.Device{{HostPath: "no_slash"}},
expectError: true,
},
{
name: "hostPath_but_no_type",
devices: []*runtime.Device{{HostPath: "://5B45201D-F2F2-4F3B-85BB-30FF1F953599"}},
expectError: true,
},
{
name: "hostPath_but_no_id",
devices: []*runtime.Device{{HostPath: "gpu://"}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: "", IDType: "gpu"}},
},
{
name: "hostPath_dockerstyle_with_slashes_in_id",
devices: []*runtime.Device{{HostPath: "class/slashed/id"}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{{ID: "slashed/id", IDType: "class"}},
},
{
name: "hostPath_docker_style_non-class_idtypewith_slashes_in_id",
devices: []*runtime.Device{{HostPath: "vpci-location-path/slashed/id"}},
expectError: true,
},
{
name: "hostPath_hcsshim_wcow_location_path_twice",
devices: []*runtime.Device{
{HostPath: "vpci-location-path://PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)"},
{HostPath: "vpci-location-path://PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0002)"}},
expectError: false,
expectedWindowsDevices: []specs.WindowsDevice{
{ID: "PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0001)", IDType: "vpci-location-path"},
{ID: "PCIROOT(0)#PCI(0100)#PCI(0000)#PCI(0000)#PCI(0002)", IDType: "vpci-location-path"},
},
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
var (
ctx = namespaces.WithNamespace(context.Background(), "testing")
c = &containers.Container{ID: t.Name()}
)
config := runtime.ContainerConfig{}
config.Devices = tc.devices
specOpts := []oci.SpecOpts{WithWindowsDevices(&config)}
platform := "windows"
if tc.isLCOW {
platform = "linux"
}
spec, err := oci.GenerateSpecWithPlatform(ctx, nil, platform, c, specOpts...)
if tc.expectError {
assert.Error(t, err)
} else {
require.NoError(t, err)
}
// Ensure we got the right LCOWness in the spec
if tc.isLCOW {
assert.NotNil(t, spec.Linux)
} else {
assert.Nil(t, spec.Linux)
}
if len(tc.expectedWindowsDevices) != 0 {
require.NotNil(t, spec.Windows)
require.NotNil(t, spec.Windows.Devices)
assert.Equal(t, spec.Windows.Devices, tc.expectedWindowsDevices)
} else if spec.Windows != nil && spec.Windows.Devices != nil {
assert.Empty(t, spec.Windows.Devices)
}
if spec.Linux != nil && spec.Linux.Devices != nil {
assert.Empty(t, spec.Linux.Devices)
}
})
}
}
func TestDriveMounts(t *testing.T) {
tests := []struct {
mnt *runtime.Mount
expectedContainerPath string
expectedError error
}{
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `D:\foo`}, `D:\foo`, nil},
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `D:\`}, `D:\`, nil},
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `D:`}, `D:`, nil},
{&runtime.Mount{HostPath: `\\.\pipe\a_fake_pipe_name_that_shouldnt_exist`, ContainerPath: `\\.\pipe\foo`}, `\\.\pipe\foo`, nil},
// If `C:\` is passed as container path it should continue and forward that to HCS and fail
// to align with docker's behavior.
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `C:\`}, `C:\`, nil},
// If `C:` is passed we can detect and fail immediately.
{&runtime.Mount{HostPath: `C:\`, ContainerPath: `C:`}, ``, fmt.Errorf("destination path can not be C drive")},
}
var realOS osinterface.RealOS
for _, test := range tests {
parsedMount, err := parseMount(realOS, test.mnt)
if err != nil && !strings.EqualFold(err.Error(), test.expectedError.Error()) {
t.Fatalf("expected err: %s, got %s instead", test.expectedError, err)
} else if err == nil && test.expectedContainerPath != parsedMount.Destination {
t.Fatalf("expected container path: %s, got %s instead", test.expectedContainerPath, parsedMount.Destination)
}
}
}

View File

@@ -1,45 +0,0 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"github.com/containerd/containerd/v2/pkg/blockio"
"github.com/containerd/log"
)
// blockIOClassFromAnnotations examines container and pod annotations of a
// container and returns its effective blockio class.
func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
cls, err := blockio.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations)
if err != nil {
return "", err
}
if cls != "" && !blockio.IsEnabled() {
if c.config.ContainerdConfig.IgnoreBlockIONotEnabledErrors {
cls = ""
log.L.Debugf("continuing create container %s, ignoring blockio not enabled (%v)", containerName, err)
} else {
return "", fmt.Errorf("blockio disabled, refusing to set blockio class of container %q to %q", containerName, cls)
}
}
return cls, nil
}

View File

@@ -1,23 +0,0 @@
//go:build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
return "", nil
}

View File

@@ -1,133 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"os"
"path/filepath"
"sync"
"github.com/containerd/go-cni"
"github.com/containerd/log"
"github.com/fsnotify/fsnotify"
)
// cniNetConfSyncer is used to reload cni network conf triggered by fs change
// events.
type cniNetConfSyncer struct {
// only used for lastSyncStatus
sync.RWMutex
lastSyncStatus error
watcher *fsnotify.Watcher
confDir string
netPlugin cni.CNI
loadOpts []cni.Opt
}
// newCNINetConfSyncer creates cni network conf syncer.
func newCNINetConfSyncer(confDir string, netPlugin cni.CNI, loadOpts []cni.Opt) (*cniNetConfSyncer, error) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return nil, fmt.Errorf("failed to create fsnotify watcher: %w", err)
}
// /etc/cni has to be readable for non-root users (0755), because /etc/cni/tuning/allowlist.conf is used for rootless mode too.
// This file was introduced in CNI plugins 1.2.0 (https://github.com/containernetworking/plugins/pull/693), and its path is hard-coded.
confDirParent := filepath.Dir(confDir)
if err := os.MkdirAll(confDirParent, 0755); err != nil {
return nil, fmt.Errorf("failed to create the parent of the cni conf dir=%s: %w", confDirParent, err)
}
if err := os.MkdirAll(confDir, 0700); err != nil {
return nil, fmt.Errorf("failed to create cni conf dir=%s for watch: %w", confDir, err)
}
if err := watcher.Add(confDir); err != nil {
return nil, fmt.Errorf("failed to watch cni conf dir %s: %w", confDir, err)
}
syncer := &cniNetConfSyncer{
watcher: watcher,
confDir: confDir,
netPlugin: netPlugin,
loadOpts: loadOpts,
}
if err := syncer.netPlugin.Load(syncer.loadOpts...); err != nil {
log.L.WithError(err).Error("failed to load cni during init, please check CRI plugin status before setting up network for pods")
syncer.updateLastStatus(err)
}
return syncer, nil
}
// syncLoop monitors any fs change events from cni conf dir and tries to reload
// cni configuration.
func (syncer *cniNetConfSyncer) syncLoop() error {
for {
select {
case event, ok := <-syncer.watcher.Events:
if !ok {
log.L.Debugf("cni watcher channel is closed")
return nil
}
// Only reload config when receiving write/rename/remove
// events
//
// TODO(fuweid): Might only reload target cni config
// files to prevent no-ops.
if event.Has(fsnotify.Chmod) || event.Has(fsnotify.Create) {
log.L.Debugf("ignore event from cni conf dir: %s", event)
continue
}
log.L.Debugf("receiving change event from cni conf dir: %s", event)
lerr := syncer.netPlugin.Load(syncer.loadOpts...)
if lerr != nil {
log.L.WithError(lerr).
Errorf("failed to reload cni configuration after receiving fs change event(%s)", event)
}
syncer.updateLastStatus(lerr)
case err := <-syncer.watcher.Errors:
if err != nil {
log.L.WithError(err).Error("failed to continue sync cni conf change")
return err
}
}
}
}
// lastStatus retrieves last sync status.
func (syncer *cniNetConfSyncer) lastStatus() error {
syncer.RLock()
defer syncer.RUnlock()
return syncer.lastSyncStatus
}
// updateLastStatus will be called after every single cni load.
func (syncer *cniNetConfSyncer) updateLastStatus(err error) {
syncer.Lock()
defer syncer.Unlock()
syncer.lastSyncStatus = err
}
// stop stops watcher in the syncLoop.
func (syncer *cniNetConfSyncer) stop() error {
return syncer.watcher.Close()
}

View File

@@ -1,84 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"io"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/log"
"k8s.io/client-go/tools/remotecommand"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/pkg/cri/io"
)
// Attach prepares a streaming endpoint to attach to a running container, and returns the address.
func (c *criService) Attach(ctx context.Context, r *runtime.AttachRequest) (*runtime.AttachResponse, error) {
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container in store: %w", err)
}
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.streamServer.GetAttach(r)
}
func (c *criService) attachContainer(ctx context.Context, id string, stdin io.Reader, stdout, stderr io.WriteCloser,
tty bool, resize <-chan remotecommand.TerminalSize) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Get container from our container store.
cntr, err := c.containerStore.Get(id)
if err != nil {
return fmt.Errorf("failed to find container %q in store: %w", id, err)
}
id = cntr.ID
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
task, err := cntr.Container.Task(ctx, nil)
if err != nil {
return fmt.Errorf("failed to load task: %w", err)
}
handleResizing(ctx, resize, func(size remotecommand.TerminalSize) {
if err := task.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to resize task %q console", id)
}
})
opts := cio.AttachOptions{
Stdin: stdin,
Stdout: stdout,
Stderr: stderr,
Tty: tty,
StdinOnce: cntr.Config.StdinOnce,
CloseStdin: func() error {
return task.CloseIO(ctx, containerd.WithStdinCloser)
},
}
// TODO(random-liu): Figure out whether we need to support historical output.
cntr.IO.Attach(opts)
return nil
}

View File

@@ -1,29 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) {
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,270 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/contrib/apparmor"
"github.com/containerd/containerd/v2/contrib/seccomp"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/oci"
customopts "github.com/containerd/containerd/v2/pkg/cri/opts"
)
const (
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
// runtimeDefault indicates that we should use or create a runtime default profile.
runtimeDefault = "runtime/default"
// dockerDefault indicates that we should use or create a docker default profile.
dockerDefault = "docker/default"
// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
appArmorDefaultProfileName = "cri-containerd.apparmor.d"
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
unconfinedProfile = "unconfined"
// seccompDefaultProfile is the default seccomp profile.
seccompDefaultProfile = dockerDefault
)
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
var (
specOpts []oci.SpecOpts
err error
)
securityContext := config.GetLinux().GetSecurityContext()
userstr := "0" // runtime default
if securityContext.GetRunAsUsername() != "" {
userstr = securityContext.GetRunAsUsername()
} else if securityContext.GetRunAsUser() != nil {
userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
} else if imageConfig.User != "" {
userstr, _, _ = strings.Cut(imageConfig.User, ":")
}
specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr),
customopts.WithSupplementalGroups(securityContext.GetSupplementalGroups()))
asp := securityContext.GetApparmor()
if asp == nil {
asp, err = generateApparmorSecurityProfile(securityContext.GetApparmorProfile()) //nolint:staticcheck // Deprecated but we don't want to remove yet
if err != nil {
return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
}
}
apparmorSpecOpts, err := generateApparmorSpecOpts(
asp,
securityContext.GetPrivileged(),
c.apparmorEnabled())
if err != nil {
return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
}
if apparmorSpecOpts != nil {
specOpts = append(specOpts, apparmorSpecOpts)
}
ssp := securityContext.GetSeccomp()
if ssp == nil {
ssp, err = generateSeccompSecurityProfile(
securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet
c.config.UnsetSeccompProfile)
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
}
}
seccompSpecOpts, err := c.generateSeccompSpecOpts(
ssp,
securityContext.GetPrivileged(),
c.seccompEnabled())
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
if c.config.EnableCDI {
specOpts = append(specOpts, customopts.WithCDI(config.Annotations, config.CDIDevices))
}
return specOpts, nil
}
func generateSeccompSecurityProfile(profilePath string, unsetProfilePath string) (*runtime.SecurityProfile, error) {
if profilePath != "" {
return generateSecurityProfile(profilePath)
}
if unsetProfilePath != "" {
return generateSecurityProfile(unsetProfilePath)
}
return nil, nil
}
func generateApparmorSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
if profilePath != "" {
return generateSecurityProfile(profilePath)
}
return nil, nil
}
func generateSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
switch profilePath {
case runtimeDefault, dockerDefault, "":
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_RuntimeDefault,
}, nil
case unconfinedProfile:
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_Unconfined,
}, nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(profilePath, profileNamePrefix) {
return nil, fmt.Errorf("invalid profile %q", profilePath)
}
return &runtime.SecurityProfile{
ProfileType: runtime.SecurityProfile_Localhost,
LocalhostRef: strings.TrimPrefix(profilePath, profileNamePrefix),
}, nil
}
}
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
func (c *criService) generateSeccompSpecOpts(sp *runtime.SecurityProfile, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
if privileged {
// Do not set seccomp profile when container is privileged
return nil, nil
}
if !seccompEnabled {
if sp != nil {
if sp.ProfileType != runtime.SecurityProfile_Unconfined {
return nil, errors.New("seccomp is not supported")
}
}
return nil, nil
}
if sp == nil {
return nil, nil
}
if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
return nil, errors.New("seccomp config invalid LocalhostRef must only be set if ProfileType is Localhost")
}
switch sp.ProfileType {
case runtime.SecurityProfile_Unconfined:
// Do not set seccomp profile.
return nil, nil
case runtime.SecurityProfile_RuntimeDefault:
return seccomp.WithDefaultProfile(), nil
case runtime.SecurityProfile_Localhost:
// trimming the localhost/ prefix just in case even though it should not
// be necessary with the new SecurityProfile struct
return seccomp.WithProfile(strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)), nil
default:
return nil, errors.New("seccomp unknown ProfileType")
}
}
// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
func generateApparmorSpecOpts(sp *runtime.SecurityProfile, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
if !apparmorEnabled {
// Should fail loudly if user try to specify apparmor profile
// but we don't support it.
if sp != nil {
if sp.ProfileType != runtime.SecurityProfile_Unconfined {
return nil, errors.New("apparmor is not supported")
}
}
return nil, nil
}
if sp == nil {
// Based on kubernetes#51746, default apparmor profile should be applied
// for when apparmor is not specified.
sp, _ = generateSecurityProfile("")
}
if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
return nil, errors.New("apparmor config invalid LocalhostRef must only be set if ProfileType is Localhost")
}
switch sp.ProfileType {
case runtime.SecurityProfile_Unconfined:
// Do not set apparmor profile.
return nil, nil
case runtime.SecurityProfile_RuntimeDefault:
if privileged {
// Do not set apparmor profile when container is privileged
return nil, nil
}
// TODO (mikebrow): delete created apparmor default profile
return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
case runtime.SecurityProfile_Localhost:
// trimming the localhost/ prefix just in case even through it should not
// be necessary with the new SecurityProfile struct
appArmorProfile := strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)
if profileExists, err := appArmorProfileExists(appArmorProfile); !profileExists {
if err != nil {
return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
}
return nil, fmt.Errorf("apparmor profile not found %s", appArmorProfile)
}
return apparmor.WithProfile(appArmorProfile), nil
default:
return nil, errors.New("apparmor unknown ProfileType")
}
}
// appArmorProfileExists scans apparmor/profiles for the requested profile
func appArmorProfileExists(profile string) (bool, error) {
if profile == "" {
return false, errors.New("nil apparmor profile is not supported")
}
profiles, err := os.Open("/sys/kernel/security/apparmor/profiles")
if err != nil {
return false, err
}
defer profiles.Close()
rbuff := bufio.NewReader(profiles)
for {
line, err := rbuff.ReadString('\n')
switch err {
case nil:
if strings.HasPrefix(line, profile+" (") {
return true, nil
}
case io.EOF:
return false, nil
default:
return false, err
}
}
}
// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
func snapshotterOpts(config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,36 +0,0 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/oci"
)
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return []oci.SpecOpts{}, nil
}
// snapshotterOpts returns snapshotter options for the rootfs snapshot
func snapshotterOpts(config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -1,115 +0,0 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/cri/annotations"
)
// checkMount is defined by all tests but not used here
var _ = checkMount
func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
*imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{
Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
},
Command: []string{"test", "command"},
Args: []string{"test", "args"},
WorkingDir: "test-cwd",
Envs: []*runtime.KeyValue{
{Key: "k1", Value: "v1"},
{Key: "k2", Value: "v2"},
{Key: "k3", Value: "v3=v3bis"},
{Key: "k4", Value: "v4=v4bis=foop"},
},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"ca-c": "ca-d"},
Mounts: []*runtime.Mount{
// everything default
{
ContainerPath: "container-path-1",
HostPath: "host-path-1",
},
// readOnly
{
ContainerPath: "container-path-2",
HostPath: "host-path-2",
Readonly: true,
},
},
}
sandboxConfig := &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-sandbox-name",
Uid: "test-sandbox-uid",
Namespace: "test-sandbox-ns",
Attempt: 2,
},
Annotations: map[string]string{"c": "d"},
}
imageConfig := &imagespec.ImageConfig{
Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"},
Entrypoint: []string{"/entrypoint"},
Cmd: []string{"cmd"},
WorkingDir: "/workspace",
}
specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args)
assert.Equal(t, "test-cwd", spec.Process.Cwd)
assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop")
assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop")
t.Logf("Check bind mount")
checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rw"}, nil)
checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"ro"}, nil)
t.Logf("Check PodSandbox annotations")
assert.Contains(t, spec.Annotations, annotations.SandboxID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID)
assert.Contains(t, spec.Annotations, annotations.ContainerType)
assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer)
assert.Contains(t, spec.Annotations, annotations.SandboxNamespace)
assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns")
assert.Contains(t, spec.Annotations, annotations.SandboxUID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid")
assert.Contains(t, spec.Annotations, annotations.SandboxName)
assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name")
assert.Contains(t, spec.Annotations, annotations.ImageName)
assert.EqualValues(t, spec.Annotations[annotations.ImageName], testImageName)
}
return config, sandboxConfig, imageConfig, specCheck
}

View File

@@ -1,782 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"os"
"path/filepath"
goruntime "runtime"
"testing"
ostesting "github.com/containerd/containerd/v2/pkg/os/testing"
"github.com/containerd/platforms"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/cri/config"
"github.com/containerd/containerd/v2/pkg/cri/constants"
"github.com/containerd/containerd/v2/pkg/cri/opts"
"github.com/containerd/containerd/v2/pkg/oci"
)
var currentPlatform = platforms.DefaultSpec()
func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string,
contains, notcontains []string) {
found := false
for _, m := range mounts {
if m.Source == src && m.Destination == dest {
assert.Equal(t, m.Type, typ)
for _, c := range contains {
assert.Contains(t, m.Options, c)
}
for _, n := range notcontains {
assert.NotContains(t, m.Options, n)
}
found = true
break
}
}
assert.True(t, found, "mount from %q to %q not found", src, dest)
}
const testImageName = "container-image-name"
func TestGeneralContainerSpec(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
testSandboxID := "sandbox-id"
testContainerName := "container-name"
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
require.NoError(t, err)
specCheck(t, testID, testSandboxID, testPid, spec)
}
func TestPodAnnotationPassthroughContainerSpec(t *testing.T) {
switch goruntime.GOOS {
case "darwin":
t.Skip("not implemented on Darwin")
case "freebsd":
t.Skip("not implemented on FreeBSD")
}
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
for _, test := range []struct {
desc string
podAnnotations []string
configChange func(*runtime.PodSandboxConfig)
specCheck func(*testing.T, *runtimespec.Spec)
}{
{
desc: "a passthrough annotation should be passed as an OCI annotation",
podAnnotations: []string{"c"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["c"], "d")
},
},
{
desc: "a non-passthrough annotation should not be passed as an OCI annotation",
configChange: func(c *runtime.PodSandboxConfig) {
c.Annotations["d"] = "e"
},
podAnnotations: []string{"c"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, spec.Annotations["c"], "d")
_, ok := spec.Annotations["d"]
assert.False(t, ok)
},
},
{
desc: "passthrough annotations should support wildcard match",
configChange: func(c *runtime.PodSandboxConfig) {
c.Annotations["t.f"] = "j"
c.Annotations["z.g"] = "o"
c.Annotations["z"] = "o"
c.Annotations["y.ca"] = "b"
c.Annotations["y"] = "b"
},
podAnnotations: []string{"t*", "z.*", "y.c*"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
t.Logf("%+v", spec.Annotations)
assert.Equal(t, spec.Annotations["t.f"], "j")
assert.Equal(t, spec.Annotations["z.g"], "o")
assert.Equal(t, spec.Annotations["y.ca"], "b")
_, ok := spec.Annotations["y"]
assert.False(t, ok)
_, ok = spec.Annotations["z"]
assert.False(t, ok)
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
if test.configChange != nil {
test.configChange(sandboxConfig)
}
ociRuntime := config.Runtime{
PodAnnotations: test.podAnnotations,
}
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName,
containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
if test.specCheck != nil {
test.specCheck(t, spec)
}
})
}
}
func TestContainerSpecCommand(t *testing.T) {
for _, test := range []struct {
desc string
criEntrypoint []string
criArgs []string
imageEntrypoint []string
imageArgs []string
expected []string
expectErr bool
}{
{
desc: "should use cri entrypoint if it's specified",
criEntrypoint: []string{"a", "b"},
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"a", "b"},
},
{
desc: "should use cri entrypoint if it's specified even if it's empty",
criEntrypoint: []string{},
criArgs: []string{"a", "b"},
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"a", "b"},
},
{
desc: "should use cri entrypoint and args if they are specified",
criEntrypoint: []string{"a", "b"},
criArgs: []string{"c", "d"},
imageEntrypoint: []string{"e", "f"},
imageArgs: []string{"g", "h"},
expected: []string{"a", "b", "c", "d"},
},
{
desc: "should use image entrypoint if cri entrypoint is not specified",
criArgs: []string{"a", "b"},
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"c", "d", "a", "b"},
},
{
desc: "should use image args if both cri entrypoint and args are not specified",
imageEntrypoint: []string{"c", "d"},
imageArgs: []string{"e", "f"},
expected: []string{"c", "d", "e", "f"},
},
{
desc: "should return error if both entrypoint and args are empty",
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config, _, imageConfig, _ := getCreateContainerTestData()
config.Command = test.criEntrypoint
config.Args = test.criArgs
imageConfig.Entrypoint = test.imageEntrypoint
imageConfig.Cmd = test.imageArgs
var spec runtimespec.Spec
err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec)
if test.expectErr {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.expected, spec.Process.Args, test.desc)
})
}
}
func TestVolumeMounts(t *testing.T) {
testContainerRootDir := "test-container-root"
idmap := []*runtime.IDMapping{
{
ContainerId: 0,
HostId: 100,
Length: 1,
},
}
for _, test := range []struct {
desc string
platform platforms.Platform
criMounts []*runtime.Mount
usernsEnabled bool
imageVolumes map[string]struct{}
expectedMountDest []string
expectedMappings []*runtime.IDMapping
}{
{
desc: "should setup rw mount for image volumes",
imageVolumes: map[string]struct{}{
"/test-volume-1": {},
"/test-volume-2": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/test-volume-2",
},
},
{
desc: "should skip image volumes if already mounted by CRI",
criMounts: []*runtime.Mount{
{
ContainerPath: "/test-volume-1",
HostPath: "/test-hostpath-1",
},
},
imageVolumes: map[string]struct{}{
"/test-volume-1": {},
"/test-volume-2": {},
},
expectedMountDest: []string{
"/test-volume-2",
},
},
{
desc: "should compare and return cleanpath",
criMounts: []*runtime.Mount{
{
ContainerPath: "/test-volume-1",
HostPath: "/test-hostpath-1",
},
},
imageVolumes: map[string]struct{}{
"/test-volume-1/": {},
"/test-volume-2/": {},
},
expectedMountDest: []string{
"/test-volume-2/",
},
},
{
desc: "should make relative paths absolute on Linux",
platform: platforms.Platform{OS: "linux"},
imageVolumes: map[string]struct{}{
"./test-volume-1": {},
"C:/test-volume-2": {},
"../../test-volume-3": {},
"/abs/test-volume-4": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/C:/test-volume-2",
"/test-volume-3",
"/abs/test-volume-4",
},
},
{
desc: "should include mappings for image volumes on Linux",
platform: platforms.Platform{OS: "linux"},
usernsEnabled: true,
imageVolumes: map[string]struct{}{
"/test-volume-1/": {},
"/test-volume-2/": {},
},
expectedMountDest: []string{
"/test-volume-2/",
"/test-volume-2/",
},
expectedMappings: idmap,
},
{
desc: "should NOT include mappings for image volumes on Linux if !userns",
platform: platforms.Platform{OS: "linux"},
usernsEnabled: false,
imageVolumes: map[string]struct{}{
"/test-volume-1/": {},
"/test-volume-2/": {},
},
expectedMountDest: []string{
"/test-volume-2/",
"/test-volume-2/",
},
},
{
desc: "should convert rel imageVolume paths to abs paths and add userns mappings",
platform: platforms.Platform{OS: "linux"},
usernsEnabled: true,
imageVolumes: map[string]struct{}{
"test-volume-1/": {},
"C:/test-volume-2/": {},
"../../test-volume-3/": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/C:/test-volume-2",
"/test-volume-3",
},
expectedMappings: idmap,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config := &imagespec.ImageConfig{
Volumes: test.imageVolumes,
}
containerConfig := &runtime.ContainerConfig{Mounts: test.criMounts}
if test.usernsEnabled {
containerConfig.Linux = &runtime.LinuxContainerConfig{
SecurityContext: &runtime.LinuxContainerSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: idmap,
Gids: idmap,
},
},
},
}
}
c := newTestCRIService()
got := c.volumeMounts(test.platform, testContainerRootDir, containerConfig, config)
assert.Len(t, got, len(test.expectedMountDest))
for _, dest := range test.expectedMountDest {
found := false
for _, m := range got {
if m.ContainerPath != dest {
continue
}
found = true
assert.Equal(t,
filepath.Dir(m.HostPath),
filepath.Join(testContainerRootDir, "volumes"))
if test.expectedMappings != nil {
assert.Equal(t, test.expectedMappings, m.UidMappings)
assert.Equal(t, test.expectedMappings, m.GidMappings)
}
break
}
assert.True(t, found)
}
})
}
}
func TestContainerAnnotationPassthroughContainerSpec(t *testing.T) {
switch goruntime.GOOS {
case "darwin":
t.Skip("not implemented on Darwin")
case "freebsd":
t.Skip("not implemented on FreeBSD")
}
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
for _, test := range []struct {
desc string
podAnnotations []string
containerAnnotations []string
podConfigChange func(*runtime.PodSandboxConfig)
configChange func(*runtime.ContainerConfig)
specCheck func(*testing.T, *runtimespec.Spec)
}{
{
desc: "passthrough annotations from pod and container should be passed as an OCI annotation",
podConfigChange: func(p *runtime.PodSandboxConfig) {
p.Annotations["pod.annotation.1"] = "1"
p.Annotations["pod.annotation.2"] = "2"
p.Annotations["pod.annotation.3"] = "3"
},
configChange: func(c *runtime.ContainerConfig) {
c.Annotations["container.annotation.1"] = "1"
c.Annotations["container.annotation.2"] = "2"
c.Annotations["container.annotation.3"] = "3"
},
podAnnotations: []string{"pod.annotation.1"},
containerAnnotations: []string{"container.annotation.1"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, "1", spec.Annotations["container.annotation.1"])
_, ok := spec.Annotations["container.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["container.annotation.3"]
assert.False(t, ok)
assert.Equal(t, "1", spec.Annotations["pod.annotation.1"])
_, ok = spec.Annotations["pod.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.3"]
assert.False(t, ok)
},
},
{
desc: "passthrough annotations from pod and container should support wildcard",
podConfigChange: func(p *runtime.PodSandboxConfig) {
p.Annotations["pod.annotation.1"] = "1"
p.Annotations["pod.annotation.2"] = "2"
p.Annotations["pod.annotation.3"] = "3"
},
configChange: func(c *runtime.ContainerConfig) {
c.Annotations["container.annotation.1"] = "1"
c.Annotations["container.annotation.2"] = "2"
c.Annotations["container.annotation.3"] = "3"
},
podAnnotations: []string{"pod.annotation.*"},
containerAnnotations: []string{"container.annotation.*"},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
assert.Equal(t, "1", spec.Annotations["container.annotation.1"])
assert.Equal(t, "2", spec.Annotations["container.annotation.2"])
assert.Equal(t, "3", spec.Annotations["container.annotation.3"])
assert.Equal(t, "1", spec.Annotations["pod.annotation.1"])
assert.Equal(t, "2", spec.Annotations["pod.annotation.2"])
assert.Equal(t, "3", spec.Annotations["pod.annotation.3"])
},
},
{
desc: "annotations should not pass through if no passthrough annotations are configured",
podConfigChange: func(p *runtime.PodSandboxConfig) {
p.Annotations["pod.annotation.1"] = "1"
p.Annotations["pod.annotation.2"] = "2"
p.Annotations["pod.annotation.3"] = "3"
},
configChange: func(c *runtime.ContainerConfig) {
c.Annotations["container.annotation.1"] = "1"
c.Annotations["container.annotation.2"] = "2"
c.Annotations["container.annotation.3"] = "3"
},
podAnnotations: []string{},
containerAnnotations: []string{},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
_, ok := spec.Annotations["container.annotation.1"]
assert.False(t, ok)
_, ok = spec.Annotations["container.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["container.annotation.3"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.1"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.2"]
assert.False(t, ok)
_, ok = spec.Annotations["pod.annotation.3"]
assert.False(t, ok)
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
if test.configChange != nil {
test.configChange(containerConfig)
}
if test.podConfigChange != nil {
test.podConfigChange(sandboxConfig)
}
ociRuntime := config.Runtime{
PodAnnotations: test.podAnnotations,
ContainerAnnotations: test.containerAnnotations,
}
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName,
containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
if test.specCheck != nil {
test.specCheck(t, spec)
}
})
}
}
func TestBaseRuntimeSpec(t *testing.T) {
c := newTestCRIService(withRuntimeService(&fakeRuntimeService{
ocispecs: map[string]*oci.Spec{
"/etc/containerd/cri-base.json": {
Version: "1.0.2",
Hostname: "old",
},
},
}))
out, err := c.runtimeSpec(
"id1",
platforms.DefaultSpec(),
"/etc/containerd/cri-base.json",
oci.WithHostname("new-host"),
oci.WithDomainname("new-domain"),
)
assert.NoError(t, err)
assert.Equal(t, "1.0.2", out.Version)
assert.Equal(t, "new-host", out.Hostname)
assert.Equal(t, "new-domain", out.Domainname)
// Make sure original base spec not changed
spec, err := c.LoadOCISpec("/etc/containerd/cri-base.json")
assert.NoError(t, err)
assert.NotEqual(t, out, spec)
assert.Equal(t, spec.Hostname, "old")
assert.Equal(t, filepath.Join("/", constants.K8sContainerdNamespace, "id1"), out.Linux.CgroupsPath)
}
func TestLinuxContainerMounts(t *testing.T) {
const testSandboxID = "test-id"
idmap := []*runtime.IDMapping{
{
ContainerId: 0,
HostId: 100,
Length: 1,
},
}
for _, test := range []struct {
desc string
statFn func(string) (os.FileInfo, error)
criMounts []*runtime.Mount
securityContext *runtime.LinuxContainerSecurityContext
expectedMounts []*runtime.Mount
}{
{
desc: "should setup ro mount when rootfs is read-only",
securityContext: &runtime.LinuxContainerSecurityContext{
ReadonlyRootfs: true,
},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: true,
SelinuxRelabel: true,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: true,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: true,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
{
desc: "should setup rw mount when rootfs is read-write",
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
{
desc: "should setup uidMappings/gidMappings when userns is used",
securityContext: &runtime.LinuxContainerSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: idmap,
Gids: idmap,
},
},
},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: false,
SelinuxRelabel: true,
UidMappings: idmap,
GidMappings: idmap,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
UidMappings: idmap,
GidMappings: idmap,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
UidMappings: idmap,
GidMappings: idmap,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
{
desc: "should use host /dev/shm when host ipc is set",
securityContext: &runtime.LinuxContainerSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE},
},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: "/dev/shm",
Readonly: false,
},
},
},
{
desc: "should skip container mounts if already mounted by CRI",
criMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hostname",
HostPath: "/test-etc-hostname",
},
{
ContainerPath: "/etc/hosts",
HostPath: "/test-etc-host",
},
{
ContainerPath: resolvConfPath,
HostPath: "test-resolv-conf",
},
{
ContainerPath: "/dev/shm",
HostPath: "test-dev-shm",
},
},
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: nil,
},
{
desc: "should skip hostname mount if the old sandbox doesn't have hostname file",
statFn: func(path string) (os.FileInfo, error) {
assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path)
return nil, errors.New("random error")
},
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hosts",
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: resolvConfPath,
HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
Readonly: false,
SelinuxRelabel: true,
},
{
ContainerPath: "/dev/shm",
HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
Readonly: false,
SelinuxRelabel: true,
},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Mounts: test.criMounts,
Linux: &runtime.LinuxContainerConfig{
SecurityContext: test.securityContext,
},
}
c := newTestCRIService()
c.os.(*ostesting.FakeOS).StatFn = test.statFn
mounts := c.linuxContainerMounts(testSandboxID, config)
assert.Equal(t, test.expectedMounts, mounts, test.desc)
})
}
}

View File

@@ -1,48 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"strconv"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/oci"
)
// No extra spec options needed for windows.
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return nil, nil
}
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
func snapshotterOpts(config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
var opts []snapshots.Opt
// TODO: Only set for windows and cimfs snapshotter
rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes()
if rootfsSize != 0 {
labels := map[string]string{
"containerd.io/snapshot/windows/rootfs.sizebytes": strconv.FormatInt(rootfsSize, 10),
}
opts = append(opts, snapshots.WithLabels(labels))
}
return opts, nil
}

View File

@@ -1,363 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"github.com/stretchr/testify/require"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/cri/annotations"
"github.com/containerd/containerd/v2/pkg/cri/config"
)
func getSandboxConfig() *runtime.PodSandboxConfig {
return &runtime.PodSandboxConfig{
Metadata: &runtime.PodSandboxMetadata{
Name: "test-sandbox-name",
Uid: "test-sandbox-uid",
Namespace: "test-sandbox-ns",
Attempt: 2,
},
Windows: &runtime.WindowsPodSandboxConfig{},
Hostname: "test-hostname",
Annotations: map[string]string{"c": "d"},
}
}
func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
*imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{
Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
},
Command: []string{"test", "command"},
Args: []string{"test", "args"},
WorkingDir: "test-cwd",
Envs: []*runtime.KeyValue{
{Key: "k1", Value: "v1"},
{Key: "k2", Value: "v2"},
{Key: "k3", Value: "v3=v3bis"},
{Key: "k4", Value: "v4=v4bis=foop"},
},
Mounts: []*runtime.Mount{
// everything default
{
ContainerPath: "container-path-1",
HostPath: "host-path-1",
},
// readOnly
{
ContainerPath: "container-path-2",
HostPath: "host-path-2",
Readonly: true,
},
},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
Windows: &runtime.WindowsContainerConfig{
Resources: &runtime.WindowsContainerResources{
CpuShares: 100,
CpuCount: 200,
CpuMaximum: 300,
MemoryLimitInBytes: 400,
},
SecurityContext: &runtime.WindowsContainerSecurityContext{
RunAsUsername: "test-user",
CredentialSpec: "{\"test\": \"spec\"}",
HostProcess: false,
},
},
}
sandboxConfig := getSandboxConfig()
imageConfig := &imagespec.ImageConfig{
Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"},
Entrypoint: []string{"/entrypoint"},
Cmd: []string{"cmd"},
WorkingDir: "/workspace",
User: "ContainerUser",
}
specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
assert.Nil(t, spec.Root)
assert.Equal(t, "test-hostname", spec.Hostname)
assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args)
assert.Equal(t, "test-cwd", spec.Process.Cwd)
assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop")
assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop")
t.Logf("Check bind mount")
checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "", []string{"rw"}, nil)
checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "", []string{"ro"}, nil)
t.Logf("Check resource limits")
assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, 100)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Count, 200)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300)
assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300)
assert.EqualValues(t, *spec.Windows.Resources.Memory.Limit, 400)
// Also checks if override of the image configs user is behaving.
t.Logf("Check username")
assert.Contains(t, spec.Process.User.Username, "test-user")
t.Logf("Check credential spec")
assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}")
t.Logf("Check PodSandbox annotations")
assert.Contains(t, spec.Annotations, annotations.SandboxID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID)
assert.Contains(t, spec.Annotations, annotations.ContainerType)
assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer)
assert.Contains(t, spec.Annotations, annotations.SandboxNamespace)
assert.EqualValues(t, spec.Annotations[annotations.SandboxNamespace], "test-sandbox-ns")
assert.Contains(t, spec.Annotations, annotations.SandboxUID)
assert.EqualValues(t, spec.Annotations[annotations.SandboxUID], "test-sandbox-uid")
assert.Contains(t, spec.Annotations, annotations.SandboxName)
assert.EqualValues(t, spec.Annotations[annotations.SandboxName], "test-sandbox-name")
assert.Contains(t, spec.Annotations, annotations.WindowsHostProcess)
assert.EqualValues(t, spec.Annotations[annotations.WindowsHostProcess], "false")
}
return config, sandboxConfig, imageConfig, specCheck
}
func TestContainerWindowsNetworkNamespace(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-cni"
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
assert.NotNil(t, spec.Windows)
assert.NotNil(t, spec.Windows.Network)
assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace)
}
func TestMountCleanPath(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-cni"
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{
ContainerPath: "c:/test/container-path",
HostPath: "c:/test/host-path",
})
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
checkMount(t, spec.Mounts, "c:\\test\\host-path", "c:\\test\\container-path", "", []string{"rw"}, nil)
}
func TestMountNamedPipe(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-cni"
c := newTestCRIService()
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{
ContainerPath: `\\.\pipe\foo`,
HostPath: `\\.\pipe\foo`,
})
spec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, spec)
specCheck(t, testID, testSandboxID, testPid, spec)
checkMount(t, spec.Mounts, `\\.\pipe\foo`, `\\.\pipe\foo`, "", []string{"rw"}, nil)
}
func TestHostProcessRequirements(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
for _, test := range []struct {
desc string
containerHostProcess bool
sandboxHostProcess bool
expectError bool
}{
{
desc: "hostprocess container in non-hostprocess sandbox should fail",
containerHostProcess: true,
sandboxHostProcess: false,
expectError: true,
},
{
desc: "hostprocess container in hostprocess sandbox should be fine",
containerHostProcess: true,
sandboxHostProcess: true,
expectError: false,
},
{
desc: "non-hostprocess container in hostprocess sandbox should fail",
containerHostProcess: false,
sandboxHostProcess: true,
expectError: true,
},
{
desc: "non-hostprocess container in non-hostprocess sandbox should be fine",
containerHostProcess: false,
sandboxHostProcess: false,
expectError: false,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
containerConfig.Windows.SecurityContext.HostProcess = test.containerHostProcess
sandboxConfig.Windows.SecurityContext = &runtime.WindowsSandboxSecurityContext{
HostProcess: test.sandboxHostProcess,
}
_, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
if test.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
func TestEntrypointAndCmdForArgsEscaped(t *testing.T) {
testID := "test-id"
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
nsPath := "test-ns"
c := newTestCRIService()
for name, test := range map[string]struct {
imgEntrypoint []string
imgCmd []string
command []string
args []string
expectedArgs []string
expectedCommandLine string
ArgsEscaped bool
}{
// override image entrypoint and cmd in shell form with container args and verify expected runtime spec
"TestShellFormImgEntrypointCmdWithCtrArgs": {
imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`},
imgCmd: []string{`cmd -args "hello world"`},
command: nil,
args: []string{`cmd -args "additional args"`},
expectedArgs: nil,
expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "test value" "cmd -args \"additional args\""`,
ArgsEscaped: true,
},
// check image entrypoint and cmd in shell form without overriding with container command and args and verify expected runtime spec
"TestShellFormImgEntrypointCmdWithoutCtrArgs": {
imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`},
imgCmd: []string{`cmd -args "hello world"`},
command: nil,
args: nil,
expectedArgs: nil,
expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "test value" "cmd -args \"hello world\""`,
ArgsEscaped: true,
},
// override image entrypoint and cmd by container command and args in shell form and verify expected runtime spec
"TestShellFormImgEntrypointCmdWithCtrEntrypointAndArgs": {
imgEntrypoint: []string{`"C:\My Folder\MyProcess.exe" -arg1 "test value"`},
imgCmd: []string{`cmd -args "hello world"`},
command: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "additional test value"},
args: []string{"cmd", "-args", "additional args"},
expectedArgs: nil,
expectedCommandLine: `"C:\My Folder\MyProcess.exe" -arg1 "additional test value" cmd -args "additional args"`,
ArgsEscaped: true,
},
// override image cmd by container args in exec form and verify expected runtime spec
"TestExecFormImgEntrypointCmdWithCtrArgs": {
imgEntrypoint: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value"},
imgCmd: []string{"cmd", "-args", "hello world"},
command: nil,
args: []string{"additional", "args"},
expectedArgs: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value", "additional", "args"},
expectedCommandLine: "",
ArgsEscaped: false,
},
// check image entrypoint and cmd in exec form without overriding with container command and args and verify expected runtime spec
"TestExecFormImgEntrypointCmdWithoutCtrArgs": {
imgEntrypoint: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value"},
imgCmd: []string{"cmd", "-args", "hello world"},
command: nil,
args: nil,
expectedArgs: []string{`C:\My Folder\MyProcess.exe`, "-arg1", "test value", "cmd", "-args", "hello world"},
expectedCommandLine: "",
ArgsEscaped: false,
},
} {
t.Run(name, func(t *testing.T) {
imageConfig := &imagespec.ImageConfig{
Entrypoint: test.imgEntrypoint,
Cmd: test.imgCmd,
ArgsEscaped: test.ArgsEscaped,
}
sandboxConfig := getSandboxConfig()
containerConfig := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{
Image: testImageName,
},
Command: test.command,
Args: test.args,
Windows: &runtime.WindowsContainerConfig{},
}
runtimeSpec, err := c.buildContainerSpec(currentPlatform, testID, testSandboxID, testPid, nsPath, testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.NotNil(t, runtimeSpec)
// check the runtime spec for expected commandline and args
actualCommandLine := runtimeSpec.Process.CommandLine
actualArgs := runtimeSpec.Process.Args
require.Equal(t, actualArgs, test.expectedArgs)
require.Equal(t, actualCommandLine, test.expectedCommandLine)
})
}
}

View File

@@ -1,33 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) GetContainerEvents(r *runtime.GetEventsRequest, s runtime.RuntimeService_GetContainerEventsServer) error {
eventC, closer := c.containerEventsQ.Subscribe()
defer closer.Close()
for event := range eventC {
if err := s.Send(&event); err != nil {
return err
}
}
return nil
}

View File

@@ -1,37 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// Exec prepares a streaming endpoint to execute a command in the container, and returns the address.
func (c *criService) Exec(ctx context.Context, r *runtime.ExecRequest) (*runtime.ExecResponse, error) {
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container %q in store: %w", r.GetContainerId(), err)
}
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.streamServer.GetExec(r)
}

View File

@@ -1,310 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"context"
"fmt"
"io"
"syscall"
"time"
containerd "github.com/containerd/containerd/v2/client"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"k8s.io/client-go/tools/remotecommand"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/pkg/cri/io"
"github.com/containerd/containerd/v2/pkg/cri/util"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
)
type cappedWriter struct {
w io.WriteCloser
remain int
}
func (cw *cappedWriter) Write(p []byte) (int, error) {
if cw.remain <= 0 {
return len(p), nil
}
end := cw.remain
if end > len(p) {
end = len(p)
}
written, err := cw.w.Write(p[0:end])
cw.remain -= written
if err != nil {
return written, err
}
return len(p), nil
}
func (cw *cappedWriter) Close() error {
return cw.w.Close()
}
func (cw *cappedWriter) isFull() bool {
return cw.remain <= 0
}
// ExecSync executes a command in the container, and returns the stdout output.
// If command exits with a non-zero exit code, an error is returned.
func (c *criService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (*runtime.ExecSyncResponse, error) {
const maxStreamSize = 1024 * 1024 * 16
var stdout, stderr bytes.Buffer
// cappedWriter truncates the output. In that case, the size of
// the ExecSyncResponse will hit the CRI plugin's gRPC response limit.
// Thus the callers outside of the containerd process (e.g. Kubelet) never see
// the truncated output.
cout := &cappedWriter{w: cioutil.NewNopWriteCloser(&stdout), remain: maxStreamSize}
cerr := &cappedWriter{w: cioutil.NewNopWriteCloser(&stderr), remain: maxStreamSize}
exitCode, err := c.execInContainer(ctx, r.GetContainerId(), execOptions{
cmd: r.GetCmd(),
stdout: cout,
stderr: cerr,
timeout: time.Duration(r.GetTimeout()) * time.Second,
})
if err != nil {
return nil, fmt.Errorf("failed to exec in container: %w", err)
}
return &runtime.ExecSyncResponse{
Stdout: stdout.Bytes(),
Stderr: stderr.Bytes(),
ExitCode: int32(*exitCode),
}, nil
}
// execOptions specifies how to execute command in container.
type execOptions struct {
cmd []string
stdin io.Reader
stdout io.WriteCloser
stderr io.WriteCloser
tty bool
resize <-chan remotecommand.TerminalSize
timeout time.Duration
}
func (c *criService) execInternal(ctx context.Context, container containerd.Container, id string, opts execOptions) (*uint32, error) {
// Cancel the context before returning to ensure goroutines are stopped.
// This is important, because if `Start` returns error, `Wait` will hang
// forever unless we cancel the context.
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var drainExecSyncIOTimeout time.Duration
var err error
if c.config.DrainExecSyncIOTimeout != "" {
drainExecSyncIOTimeout, err = time.ParseDuration(c.config.DrainExecSyncIOTimeout)
if err != nil {
return nil, fmt.Errorf("failed to parse drain_exec_sync_io_timeout %q: %w",
c.config.DrainExecSyncIOTimeout, err)
}
}
spec, err := container.Spec(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get container spec: %w", err)
}
task, err := container.Task(ctx, nil)
if err != nil {
return nil, fmt.Errorf("failed to load task: %w", err)
}
pspec := spec.Process
pspec.Terminal = opts.tty
if opts.tty {
if err := oci.WithEnv([]string{"TERM=xterm"})(ctx, nil, nil, spec); err != nil {
return nil, fmt.Errorf("add TERM env var to spec: %w", err)
}
}
pspec.Args = opts.cmd
// CommandLine may already be set on the container's spec, but we want to only use Args here.
pspec.CommandLine = ""
if opts.stdout == nil {
opts.stdout = cio.NewDiscardLogger()
}
if opts.stderr == nil {
opts.stderr = cio.NewDiscardLogger()
}
execID := util.GenerateID()
log.G(ctx).Debugf("Generated exec id %q for container %q", execID, id)
volatileRootDir := c.getVolatileContainerRootDir(id)
var execIO *cio.ExecIO
process, err := task.Exec(ctx, execID, pspec,
func(id string) (containerdio.IO, error) {
var err error
execIO, err = cio.NewExecIO(id, volatileRootDir, opts.tty, opts.stdin != nil)
return execIO, err
},
)
if err != nil {
return nil, fmt.Errorf("failed to create exec %q: %w", execID, err)
}
defer func() {
deferCtx, deferCancel := util.DeferContext()
defer deferCancel()
if _, err := process.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete exec process %q for container %q", execID, id)
}
}()
exitCh, err := process.Wait(ctx)
if err != nil {
return nil, fmt.Errorf("failed to wait for process %q: %w", execID, err)
}
if err := process.Start(ctx); err != nil {
return nil, fmt.Errorf("failed to start exec %q: %w", execID, err)
}
handleResizing(ctx, opts.resize, func(size remotecommand.TerminalSize) {
if err := process.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to resize process %q console for container %q", execID, id)
}
})
attachDone := execIO.Attach(cio.AttachOptions{
Stdin: opts.stdin,
Stdout: opts.stdout,
Stderr: opts.stderr,
Tty: opts.tty,
StdinOnce: true,
CloseStdin: func() error {
return process.CloseIO(ctx, containerd.WithStdinCloser)
},
})
execCtx := ctx
if opts.timeout > 0 {
var execCtxCancel context.CancelFunc
execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout)
defer execCtxCancel()
}
select {
case <-execCtx.Done():
// Ignore the not found error because the process may exit itself before killing.
if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to kill exec %q: %w", execID, err)
}
// Wait for the process to be killed.
exitRes := <-exitCh
log.G(ctx).Debugf("Timeout received while waiting for exec process kill %q code %d and error %v",
execID, exitRes.ExitCode(), exitRes.Error())
if err := drainExecSyncIO(ctx, process, drainExecSyncIOTimeout, attachDone); err != nil {
log.G(ctx).WithError(err).Warnf("failed to drain exec process %q io", execID)
}
return nil, fmt.Errorf("timeout %v exceeded: %w", opts.timeout, execCtx.Err())
case exitRes := <-exitCh:
code, _, err := exitRes.Result()
log.G(ctx).Debugf("Exec process %q exits with exit code %d and error %v", execID, code, err)
if err != nil {
return nil, fmt.Errorf("failed while waiting for exec %q: %w", execID, err)
}
if err := drainExecSyncIO(ctx, process, drainExecSyncIOTimeout, attachDone); err != nil {
return nil, fmt.Errorf("failed to drain exec process %q io: %w", execID, err)
}
return &code, nil
}
}
// execInContainer executes a command inside the container synchronously, and
// redirects stdio stream properly.
// This function only returns when the exec process exits, this means that:
// 1) As long as the exec process is running, the goroutine in the cri plugin
// will be running and wait for the exit code;
// 2) `kubectl exec -it` will hang until the exec process exits, even after io
// is detached. This is different from dockershim, which leaves the exec process
// running in background after io is detached.
// https://github.com/kubernetes/kubernetes/blob/v1.15.0/pkg/kubelet/dockershim/exec.go#L127
// For example, if the `kubectl exec -it` process is killed, IO will be closed. In
// this case, the CRI plugin will still have a goroutine waiting for the exec process
// to exit and log the exit code, but dockershim won't.
func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) {
// Get container from our container store.
cntr, err := c.containerStore.Get(id)
if err != nil {
return nil, fmt.Errorf("failed to find container %q in store: %w", id, err)
}
id = cntr.ID
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.execInternal(ctx, cntr.Container, id, opts)
}
// drainExecSyncIO drains process IO with timeout after exec init process exits.
//
// By default, the child processes spawned by exec process will inherit standard
// io file descriptors. The shim server creates a pipe as data channel. Both
// exec process and its children write data into the write end of the pipe.
// And the shim server will read data from the pipe. If the write end is still
// open, the shim server will continue to wait for data from pipe.
//
// If the exec command is like `bash -c "sleep 365d &"`, the exec process
// is bash and quit after create `sleep 365d`. But the `sleep 365d` will hold
// the write end of the pipe for a year! It doesn't make senses that CRI plugin
// should wait for it.
func drainExecSyncIO(ctx context.Context, execProcess containerd.Process, drainExecIOTimeout time.Duration, attachDone <-chan struct{}) error {
var timerCh <-chan time.Time
if drainExecIOTimeout != 0 {
timer := time.NewTimer(drainExecIOTimeout)
defer timer.Stop()
timerCh = timer.C
}
select {
case <-timerCh:
case <-attachDone:
log.G(ctx).Tracef("Stream pipe for exec process %q done", execProcess.ID())
return nil
}
log.G(ctx).Debugf("Exec process %q exits but the io is still held by other processes. Trying to delete exec process to release io", execProcess.ID())
_, err := execProcess.Delete(ctx, containerd.WithProcessKill)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to release exec io by deleting exec process %q: %w",
execProcess.ID(), err)
}
}
return fmt.Errorf("failed to drain exec process %q io in %s because io is still held by other processes",
execProcess.ID(), drainExecIOTimeout)
}

View File

@@ -1,150 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"context"
"os"
"syscall"
"testing"
"time"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/pkg/cio"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
"github.com/stretchr/testify/assert"
)
func TestCWWrite(t *testing.T) {
var buf bytes.Buffer
cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 10}
n, err := cw.Write([]byte("hello"))
assert.NoError(t, err)
assert.Equal(t, 5, n)
n, err = cw.Write([]byte("helloworld"))
assert.NoError(t, err, "no errors even it hits the cap")
assert.Equal(t, 10, n, "no indication of partial write")
assert.True(t, cw.isFull())
assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped")
_, err = cw.Write([]byte("world"))
assert.NoError(t, err)
assert.True(t, cw.isFull())
assert.Equal(t, []byte("hellohello"), buf.Bytes(), "the underlying writer is capped")
}
func TestCWClose(t *testing.T) {
var buf bytes.Buffer
cw := &cappedWriter{w: cioutil.NewNopWriteCloser(&buf), remain: 5}
err := cw.Close()
assert.NoError(t, err)
}
func TestDrainExecSyncIO(t *testing.T) {
ctx := context.TODO()
t.Run("NoTimeout", func(t *testing.T) {
ep := &fakeExecProcess{
id: t.Name(),
pid: uint32(os.Getpid()),
}
attachDoneCh := make(chan struct{})
time.AfterFunc(2*time.Second, func() { close(attachDoneCh) })
assert.NoError(t, drainExecSyncIO(ctx, ep, 0, attachDoneCh))
assert.Equal(t, 0, len(ep.actionEvents))
})
t.Run("With3Seconds", func(t *testing.T) {
ep := &fakeExecProcess{
id: t.Name(),
pid: uint32(os.Getpid()),
}
attachDoneCh := make(chan struct{})
time.AfterFunc(100*time.Second, func() { close(attachDoneCh) })
assert.Error(t, drainExecSyncIO(ctx, ep, 3*time.Second, attachDoneCh))
assert.Equal(t, []string{"Delete"}, ep.actionEvents)
})
}
type fakeExecProcess struct {
id string
pid uint32
actionEvents []string
}
// ID of the process
func (p *fakeExecProcess) ID() string {
return p.id
}
// Pid is the system specific process id
func (p *fakeExecProcess) Pid() uint32 {
return p.pid
}
// Start starts the process executing the user's defined binary
func (p *fakeExecProcess) Start(context.Context) error {
p.actionEvents = append(p.actionEvents, "Start")
return nil
}
// Delete removes the process and any resources allocated returning the exit status
func (p *fakeExecProcess) Delete(context.Context, ...containerd.ProcessDeleteOpts) (*containerd.ExitStatus, error) {
p.actionEvents = append(p.actionEvents, "Delete")
return nil, nil
}
// Kill sends the provided signal to the process
func (p *fakeExecProcess) Kill(context.Context, syscall.Signal, ...containerd.KillOpts) error {
p.actionEvents = append(p.actionEvents, "Kill")
return nil
}
// Wait asynchronously waits for the process to exit, and sends the exit code to the returned channel
func (p *fakeExecProcess) Wait(context.Context) (<-chan containerd.ExitStatus, error) {
p.actionEvents = append(p.actionEvents, "Wait")
return nil, nil
}
// CloseIO allows various pipes to be closed on the process
func (p *fakeExecProcess) CloseIO(context.Context, ...containerd.IOCloserOpts) error {
p.actionEvents = append(p.actionEvents, "CloseIO")
return nil
}
// Resize changes the width and height of the process's terminal
func (p *fakeExecProcess) Resize(ctx context.Context, w, h uint32) error {
p.actionEvents = append(p.actionEvents, "Resize")
return nil
}
// IO returns the io set for the process
func (p *fakeExecProcess) IO() cio.IO {
p.actionEvents = append(p.actionEvents, "IO")
return nil
}
// Status returns the executing status of the process
func (p *fakeExecProcess) Status(context.Context) (containerd.Status, error) {
p.actionEvents = append(p.actionEvents, "Status")
return containerd.Status{}, nil
}

View File

@@ -1,116 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"time"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
)
// ListContainers lists all containers matching the filter.
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
start := time.Now()
// List all containers from store.
containersInStore := c.containerStore.List()
var containers []*runtime.Container
for _, container := range containersInStore {
containers = append(containers, toCRIContainer(container))
}
containers = c.filterCRIContainers(containers, r.GetFilter())
containerListTimer.UpdateSince(start)
return &runtime.ListContainersResponse{Containers: containers}, nil
}
// toCRIContainer converts internal container object into CRI container.
func toCRIContainer(container containerstore.Container) *runtime.Container {
status := container.Status.Get()
return &runtime.Container{
Id: container.ID,
PodSandboxId: container.SandboxID,
Metadata: container.Config.GetMetadata(),
Image: container.Config.GetImage(),
ImageRef: container.ImageRef,
State: status.State(),
CreatedAt: status.CreatedAt,
Labels: container.Config.GetLabels(),
Annotations: container.Config.GetAnnotations(),
}
}
func (c *criService) normalizeContainerFilter(filter *runtime.ContainerFilter) {
if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
filter.Id = cntr.ID
}
if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil {
filter.PodSandboxId = sb.ID
}
}
// filterCRIContainers filters CRIContainers.
func (c *criService) filterCRIContainers(containers []*runtime.Container, filter *runtime.ContainerFilter) []*runtime.Container {
if filter == nil {
return containers
}
// The containerd cri plugin supports short ids so long as there is only one
// match. So we do a lookup against the store here if a pod id has been
// included in the filter.
sb := filter.GetPodSandboxId()
if sb != "" {
sandbox, err := c.sandboxStore.Get(sb)
if err == nil {
sb = sandbox.ID
}
}
c.normalizeContainerFilter(filter)
filtered := []*runtime.Container{}
for _, cntr := range containers {
if filter.GetId() != "" && filter.GetId() != cntr.Id {
continue
}
if sb != "" && sb != cntr.PodSandboxId {
continue
}
if filter.GetState() != nil && filter.GetState().GetState() != cntr.State {
continue
}
if filter.GetLabelSelector() != nil {
match := true
for k, v := range filter.GetLabelSelector() {
got, ok := cntr.Labels[k]
if !ok || got != v {
match = false
break
}
}
if !match {
continue
}
}
filtered = append(filtered, cntr)
}
return filtered
}

View File

@@ -1,366 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
)
func TestToCRIContainer(t *testing.T) {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{Image: "test-image"},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
}
createdAt := time.Now().UnixNano()
container, err := containerstore.NewContainer(
containerstore.Metadata{
ID: "test-id",
Name: "test-name",
SandboxID: "test-sandbox-id",
Config: config,
ImageRef: "test-image-ref",
},
containerstore.WithFakeStatus(
containerstore.Status{
Pid: 1234,
CreatedAt: createdAt,
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 1,
Reason: "test-reason",
Message: "test-message",
},
),
)
assert.NoError(t, err)
expect := &runtime.Container{
Id: "test-id",
PodSandboxId: "test-sandbox-id",
Metadata: config.GetMetadata(),
Image: config.GetImage(),
ImageRef: "test-image-ref",
State: runtime.ContainerState_CONTAINER_EXITED,
CreatedAt: createdAt,
Labels: config.GetLabels(),
Annotations: config.GetAnnotations(),
}
c := toCRIContainer(container)
assert.Equal(t, expect, c)
}
func TestFilterContainers(t *testing.T) {
c := newTestCRIService()
testContainers := []*runtime.Container{
{
Id: "1",
PodSandboxId: "s-1",
Metadata: &runtime.ContainerMetadata{Name: "name-1", Attempt: 1},
State: runtime.ContainerState_CONTAINER_RUNNING,
},
{
Id: "2",
PodSandboxId: "s-2",
Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 2},
State: runtime.ContainerState_CONTAINER_EXITED,
Labels: map[string]string{"a": "b"},
},
{
Id: "3",
PodSandboxId: "s-2",
Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 3},
State: runtime.ContainerState_CONTAINER_CREATED,
Labels: map[string]string{"c": "d"},
},
}
for _, test := range []struct {
desc string
filter *runtime.ContainerFilter
expect []*runtime.Container
}{
{
desc: "no filter",
expect: testContainers,
},
{
desc: "id filter",
filter: &runtime.ContainerFilter{Id: "2"},
expect: []*runtime.Container{testContainers[1]},
},
{
desc: "state filter",
filter: &runtime.ContainerFilter{
State: &runtime.ContainerStateValue{
State: runtime.ContainerState_CONTAINER_EXITED,
},
},
expect: []*runtime.Container{testContainers[1]},
},
{
desc: "label filter",
filter: &runtime.ContainerFilter{
LabelSelector: map[string]string{"a": "b"},
},
expect: []*runtime.Container{testContainers[1]},
},
{
desc: "sandbox id filter",
filter: &runtime.ContainerFilter{PodSandboxId: "s-2"},
expect: []*runtime.Container{testContainers[1], testContainers[2]},
},
{
desc: "mixed filter not matched",
filter: &runtime.ContainerFilter{
Id: "1",
PodSandboxId: "s-2",
LabelSelector: map[string]string{"a": "b"},
},
expect: []*runtime.Container{},
},
{
desc: "mixed filter matched",
filter: &runtime.ContainerFilter{
PodSandboxId: "s-2",
State: &runtime.ContainerStateValue{
State: runtime.ContainerState_CONTAINER_CREATED,
},
LabelSelector: map[string]string{"c": "d"},
},
expect: []*runtime.Container{testContainers[2]},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
filtered := c.filterCRIContainers(testContainers, test.filter)
assert.Equal(t, test.expect, filtered, test.desc)
})
}
}
// containerForTest is a helper type for test.
type containerForTest struct {
metadata containerstore.Metadata
status containerstore.Status
}
func (c containerForTest) toContainer() (containerstore.Container, error) {
return containerstore.NewContainer(
c.metadata,
containerstore.WithFakeStatus(c.status),
)
}
func TestListContainers(t *testing.T) {
c := newTestCRIService()
sandboxesInStore := []sandboxstore.Sandbox{
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "s-1abcdef1234",
Name: "sandboxname-1",
Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-1"}},
},
sandboxstore.Status{
State: sandboxstore.StateReady,
},
),
sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: "s-2abcdef1234",
Name: "sandboxname-2",
Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-2"}},
},
sandboxstore.Status{
State: sandboxstore.StateNotReady,
},
),
}
createdAt := time.Now().UnixNano()
startedAt := time.Now().UnixNano()
finishedAt := time.Now().UnixNano()
containersInStore := []containerForTest{
{
metadata: containerstore.Metadata{
ID: "c-1container",
Name: "name-1",
SandboxID: "s-1abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-1"}},
},
status: containerstore.Status{CreatedAt: createdAt},
},
{
metadata: containerstore.Metadata{
ID: "c-2container",
Name: "name-2",
SandboxID: "s-1abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-2"}},
},
status: containerstore.Status{
CreatedAt: createdAt,
StartedAt: startedAt,
},
},
{
metadata: containerstore.Metadata{
ID: "c-3container",
Name: "name-3",
SandboxID: "s-1abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-3"}},
},
status: containerstore.Status{
CreatedAt: createdAt,
StartedAt: startedAt,
FinishedAt: finishedAt,
},
},
{
metadata: containerstore.Metadata{
ID: "c-4container",
Name: "name-4",
SandboxID: "s-2abcdef1234",
Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-4"}},
},
status: containerstore.Status{
CreatedAt: createdAt,
},
},
}
expectedContainers := []*runtime.Container{
{
Id: "c-1container",
PodSandboxId: "s-1abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-1"},
State: runtime.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
},
{
Id: "c-2container",
PodSandboxId: "s-1abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-2"},
State: runtime.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
},
{
Id: "c-3container",
PodSandboxId: "s-1abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-3"},
State: runtime.ContainerState_CONTAINER_EXITED,
CreatedAt: createdAt,
},
{
Id: "c-4container",
PodSandboxId: "s-2abcdef1234",
Metadata: &runtime.ContainerMetadata{Name: "name-4"},
State: runtime.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
},
}
// Inject test sandbox metadata
for _, sb := range sandboxesInStore {
assert.NoError(t, c.sandboxStore.Add(sb))
}
// Inject test container metadata
for _, cntr := range containersInStore {
container, err := cntr.toContainer()
assert.NoError(t, err)
assert.NoError(t, c.containerStore.Add(container))
}
for _, testdata := range []struct {
desc string
filter *runtime.ContainerFilter
expect []*runtime.Container
}{
{
desc: "test without filter",
filter: &runtime.ContainerFilter{},
expect: expectedContainers,
},
{
desc: "test filter by sandboxid",
filter: &runtime.ContainerFilter{
PodSandboxId: "s-1abcdef1234",
},
expect: expectedContainers[:3],
},
{
desc: "test filter by truncated sandboxid",
filter: &runtime.ContainerFilter{
PodSandboxId: "s-1",
},
expect: expectedContainers[:3],
},
{
desc: "test filter by containerid",
filter: &runtime.ContainerFilter{
Id: "c-1container",
},
expect: expectedContainers[:1],
},
{
desc: "test filter by truncated containerid",
filter: &runtime.ContainerFilter{
Id: "c-1",
},
expect: expectedContainers[:1],
},
{
desc: "test filter by containerid and sandboxid",
filter: &runtime.ContainerFilter{
Id: "c-1container",
PodSandboxId: "s-1abcdef1234",
},
expect: expectedContainers[:1],
},
{
desc: "test filter by truncated containerid and truncated sandboxid",
filter: &runtime.ContainerFilter{
Id: "c-1",
PodSandboxId: "s-1",
},
expect: expectedContainers[:1],
},
} {
testdata := testdata
t.Run(testdata.desc, func(t *testing.T) {
resp, err := c.ListContainers(context.Background(), &runtime.ListContainersRequest{Filter: testdata.filter})
assert.NoError(t, err)
require.NotNil(t, resp)
containers := resp.GetContainers()
assert.Len(t, containers, len(testdata.expect))
for _, cntr := range testdata.expect {
assert.Contains(t, containers, cntr)
}
})
}
}

View File

@@ -1,52 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ReopenContainerLog asks the cri plugin to reopen the stdout/stderr log file for the container.
// This is often called after the log file has been rotated.
func (c *criService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (*runtime.ReopenContainerLogResponse, error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
if container.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
return nil, errors.New("container is not running")
}
// Create new container logger and replace the existing ones.
stdoutWC, stderrWC, err := c.createContainerLoggers(container.LogPath, container.Config.GetTty())
if err != nil {
return nil, err
}
oldStdoutWC, oldStderrWC := container.IO.AddOutput("log", stdoutWC, stderrWC)
if oldStdoutWC != nil {
oldStdoutWC.Close()
}
if oldStderrWC != nil {
oldStderrWC.Close()
}
return &runtime.ReopenContainerLogResponse{}, nil
}

View File

@@ -1,164 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"time"
containerd "github.com/containerd/containerd/v2/client"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
"github.com/containerd/errdefs"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// RemoveContainer removes the container.
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
start := time.Now()
ctrID := r.GetContainerId()
container, err := c.containerStore.Get(ctrID)
if err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", ctrID, err)
}
// Do not return error if container metadata doesn't exist.
log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID)
return &runtime.RemoveContainerResponse{}, nil
}
id := container.ID
i, err := container.Container.Info(ctx)
if err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("get container info: %w", err)
}
// Since containerd doesn't see the container and criservice's content store does,
// we should try to recover from this state by removing entry for this container
// from the container store as well and return successfully.
log.G(ctx).WithError(err).Warn("get container info failed")
c.containerStore.Delete(ctrID)
c.containerNameIndex.ReleaseByKey(ctrID)
return &runtime.RemoveContainerResponse{}, nil
}
// Forcibly stop the containers if they are in running or unknown state
state := container.Status.Get().State()
if state == runtime.ContainerState_CONTAINER_RUNNING ||
state == runtime.ContainerState_CONTAINER_UNKNOWN {
log.L.Infof("Forcibly stopping container %q", id)
if err := c.stopContainer(ctx, container, 0); err != nil {
return nil, fmt.Errorf("failed to forcibly stop container %q: %w", id, err)
}
}
// Set removing state to prevent other start/remove operations against this container
// while it's being removed.
if err := setContainerRemoving(container); err != nil {
return nil, fmt.Errorf("failed to set removing state for container %q: %w", id, err)
}
defer func() {
if retErr != nil {
// Reset removing if remove failed.
if err := resetContainerRemoving(container); err != nil {
log.G(ctx).WithError(err).Errorf("failed to reset removing state for container %q", id)
}
}
}()
sandbox, err := c.sandboxStore.Get(container.SandboxID)
if err != nil {
err = c.nri.RemoveContainer(ctx, nil, &container)
} else {
err = c.nri.RemoveContainer(ctx, &sandbox, &container)
}
if err != nil {
log.G(ctx).WithError(err).Error("NRI failed to remove container")
}
// NOTE(random-liu): Docker set container to "Dead" state when start removing the
// container so as to avoid start/restart the container again. However, for current
// kubelet implementation, we'll never start a container once we decide to remove it,
// so we don't need the "Dead" state for now.
// Delete containerd container.
if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to delete containerd container %q: %w", id, err)
}
log.G(ctx).Tracef("Remove called for containerd container %q that does not exist", id)
}
// Delete container checkpoint.
if err := container.Delete(); err != nil {
return nil, fmt.Errorf("failed to delete container checkpoint for %q: %w", id, err)
}
containerRootDir := c.getContainerRootDir(id)
if err := ensureRemoveAll(ctx, containerRootDir); err != nil {
return nil, fmt.Errorf("failed to remove container root directory %q: %w",
containerRootDir, err)
}
volatileContainerRootDir := c.getVolatileContainerRootDir(id)
if err := ensureRemoveAll(ctx, volatileContainerRootDir); err != nil {
return nil, fmt.Errorf("failed to remove volatile container root directory %q: %w",
volatileContainerRootDir, err)
}
c.containerStore.Delete(id)
c.containerNameIndex.ReleaseByKey(id)
c.generateAndSendContainerEvent(ctx, id, container.SandboxID, runtime.ContainerEventType_CONTAINER_DELETED_EVENT)
containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start)
return &runtime.RemoveContainerResponse{}, nil
}
// setContainerRemoving sets the container into removing state. In removing state, the
// container will not be started or removed again.
func setContainerRemoving(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Do not remove container if it's still running or unknown.
if status.State() == runtime.ContainerState_CONTAINER_RUNNING {
return status, errors.New("container is still running, to stop first")
}
if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
return status, errors.New("container state is unknown, to stop first")
}
if status.Starting {
return status, errors.New("container is in starting state, can't be removed")
}
if status.Removing {
return status, errors.New("container is already in removing state")
}
status.Removing = true
return status, nil
})
}
// resetContainerRemoving resets the container removing state on remove failure. So
// that we could remove the container again.
func resetContainerRemoving(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Removing = false
return status, nil
})
}

View File

@@ -1,92 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
)
// TestSetContainerRemoving tests setContainerRemoving sets removing
// state correctly.
func TestSetContainerRemoving(t *testing.T) {
testID := "test-id"
for _, test := range []struct {
desc string
status containerstore.Status
expectErr bool
}{
{
desc: "should return error when container is in running state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
expectErr: true,
},
{
desc: "should return error when container is in starting state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
{
desc: "should return error when container is in removing state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
Removing: true,
},
expectErr: true,
},
{
desc: "should not return error when container is not running and removing",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
expectErr: false,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: testID},
containerstore.WithFakeStatus(test.status),
)
assert.NoError(t, err)
err = setContainerRemoving(container)
if test.expectErr {
assert.Error(t, err)
assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Removing, "removing should be set")
assert.NoError(t, resetContainerRemoving(container))
assert.False(t, container.Status.Get().Removing, "removing should be reset")
}
})
}
}

View File

@@ -1,252 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"io"
"time"
containerd "github.com/containerd/containerd/v2/client"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/errdefs"
"github.com/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
cio "github.com/containerd/containerd/v2/pkg/cri/io"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/pkg/cri/util"
cioutil "github.com/containerd/containerd/v2/pkg/ioutil"
)
// StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
start := time.Now()
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
info, err := cntr.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("get container info: %w", err)
}
id := cntr.ID
meta := cntr.Metadata
container := cntr.Container
config := meta.Config
// Set starting state to prevent other start/remove operations against this container
// while it's being started.
if err := setContainerStarting(cntr); err != nil {
return nil, fmt.Errorf("failed to set starting state for container %q: %w", id, err)
}
defer func() {
if retErr != nil {
// Set container to exited if fail to start.
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = 0
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = errorStartExitCode
status.Reason = errorStartReason
status.Message = retErr.Error()
return status, nil
}); err != nil {
log.G(ctx).WithError(err).Errorf("failed to set start failure state for container %q", id)
}
}
if err := resetContainerStarting(cntr); err != nil {
log.G(ctx).WithError(err).Errorf("failed to reset starting state for container %q", id)
}
}()
// Get sandbox config from sandbox store.
sandbox, err := c.sandboxStore.Get(meta.SandboxID)
if err != nil {
return nil, fmt.Errorf("sandbox %q not found: %w", meta.SandboxID, err)
}
sandboxID := meta.SandboxID
if sandbox.Status.Get().State != sandboxstore.StateReady {
return nil, fmt.Errorf("sandbox container %q is not running", sandboxID)
}
// Recheck target container validity in Linux namespace options.
if linux := config.GetLinux(); linux != nil {
nsOpts := linux.GetSecurityContext().GetNamespaceOptions()
if nsOpts.GetPid() == runtime.NamespaceMode_TARGET {
_, err := c.validateTargetContainer(sandboxID, nsOpts.TargetId)
if err != nil {
return nil, fmt.Errorf("invalid target container: %w", err)
}
}
}
ioCreation := func(id string) (_ containerdio.IO, err error) {
stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty())
if err != nil {
return nil, fmt.Errorf("failed to create container loggers: %w", err)
}
cntr.IO.AddOutput("log", stdoutWC, stderrWC)
cntr.IO.Pipe()
return cntr.IO, nil
}
ociRuntime, err := c.config.GetSandboxRuntime(sandbox.Config, sandbox.Metadata.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox runtime: %w", err)
}
var taskOpts []containerd.NewTaskOpts
if ociRuntime.Path != "" {
taskOpts = append(taskOpts, containerd.WithRuntimePath(ociRuntime.Path))
}
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
if err != nil {
return nil, fmt.Errorf("failed to create containerd task: %w", err)
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// It's possible that task is deleted by event monitor.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete containerd task %q", id)
}
}
}()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, fmt.Errorf("failed to wait for containerd task: %w", err)
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
err = c.nri.StopContainer(deferCtx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI stop failed for failed container %q", id)
}
}
}()
err = c.nri.StartContainer(ctx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI container start failed")
return nil, fmt.Errorf("NRI container start failed: %w", err)
}
// Start containerd task.
if err := task.Start(ctx); err != nil {
return nil, fmt.Errorf("failed to start containerd task %q: %w", id, err)
}
// Update container start timestamp.
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = task.Pid()
status.StartedAt = time.Now().UnixNano()
return status, nil
}); err != nil {
return nil, fmt.Errorf("failed to update container %q state: %w", id, err)
}
// It handles the TaskExit event and update container state after this.
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)
c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_STARTED_EVENT)
err = c.nri.PostStartContainer(ctx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI post-start notification failed")
}
containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start)
return &runtime.StartContainerResponse{}, nil
}
// setContainerStarting sets the container into starting state. In starting state, the
// container will not be removed or started again.
func setContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return status, fmt.Errorf("container is in %s state", criContainerStateToString(status.State()))
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return status, errors.New("container is in removing state, can't be started")
}
if status.Starting {
return status, errors.New("container is already in starting state")
}
status.Starting = true
return status, nil
})
}
// resetContainerStarting resets the container starting state on start failure. So
// that we could remove the container later.
func resetContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Starting = false
return status, nil
})
}
// createContainerLoggers creates container loggers and return write closer for stdout and stderr.
func (c *criService) createContainerLoggers(logPath string, tty bool) (stdout io.WriteCloser, stderr io.WriteCloser, err error) {
if logPath != "" {
// Only generate container log when log path is specified.
f, err := openLogFile(logPath)
if err != nil {
return nil, nil, fmt.Errorf("failed to create and open log file: %w", err)
}
defer func() {
if err != nil {
f.Close()
}
}()
var stdoutCh, stderrCh <-chan struct{}
wc := cioutil.NewSerialWriteCloser(f)
stdout, stdoutCh = cio.NewCRILogger(logPath, wc, cio.Stdout, c.config.MaxContainerLogLineSize)
// Only redirect stderr when there is no tty.
if !tty {
stderr, stderrCh = cio.NewCRILogger(logPath, wc, cio.Stderr, c.config.MaxContainerLogLineSize)
}
go func() {
if stdoutCh != nil {
<-stdoutCh
}
if stderrCh != nil {
<-stderrCh
}
log.L.Debugf("Finish redirecting log file %q, closing it", logPath)
f.Close()
}()
} else {
stdout = cio.NewDiscardLogger()
stderr = cio.NewDiscardLogger()
}
return
}

View File

@@ -1,106 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
)
// TestSetContainerStarting tests setContainerStarting sets removing
// state correctly.
func TestSetContainerStarting(t *testing.T) {
testID := "test-id"
for _, test := range []struct {
desc string
status containerstore.Status
expectErr bool
}{
{
desc: "should not return error when container is in created state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
},
expectErr: false,
},
{
desc: "should return error when container is in running state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
expectErr: true,
},
{
desc: "should return error when container is in exited state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
expectErr: true,
},
{
desc: "should return error when container is in unknown state",
status: containerstore.Status{
CreatedAt: 0,
StartedAt: 0,
FinishedAt: 0,
},
expectErr: true,
},
{
desc: "should return error when container is in starting state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
{
desc: "should return error when container is in removing state",
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Removing: true,
},
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: testID},
containerstore.WithFakeStatus(test.status),
)
assert.NoError(t, err)
err = setContainerStarting(container)
if test.expectErr {
assert.Error(t, err)
assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Starting, "starting should be set")
assert.NoError(t, resetContainerStarting(container))
assert.False(t, container.Status.Get().Starting, "starting should be reset")
}
})
}
}

View File

@@ -1,53 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"github.com/containerd/containerd/v2/api/services/tasks/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ContainerStats returns stats of the container. If the container does not
// exist, the call returns an error.
func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerStatsRequest) (*runtime.ContainerStatsResponse, error) {
cntr, err := c.containerStore.Get(in.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container: %w", err)
}
request := &tasks.MetricsRequest{Filters: []string{"id==" + cntr.ID}}
resp, err := c.client.TaskService().Metrics(ctx, request)
if err != nil {
return nil, fmt.Errorf("failed to fetch metrics for task: %w", err)
}
if len(resp.Metrics) != 1 {
return nil, fmt.Errorf("unexpected metrics response: %+v", resp.Metrics)
}
handler, err := c.getMetricsHandler(ctx, cntr.SandboxID)
if err != nil {
return nil, err
}
cs, err := handler(cntr.Metadata, resp.Metrics[0])
if err != nil {
return nil, fmt.Errorf("failed to decode container metrics: %w", err)
}
return &runtime.ContainerStatsResponse{Stats: cs}, nil
}

View File

@@ -1,512 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"reflect"
"time"
wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
cg1 "github.com/containerd/cgroups/v3/cgroup1/stats"
cg2 "github.com/containerd/cgroups/v3/cgroup2/stats"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/api/services/tasks/v1"
"github.com/containerd/containerd/v2/api/types"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
"github.com/containerd/containerd/v2/pkg/cri/store/stats"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
)
// ListContainerStats returns stats of all running containers.
func (c *criService) ListContainerStats(
ctx context.Context,
in *runtime.ListContainerStatsRequest,
) (*runtime.ListContainerStatsResponse, error) {
request, containers, err := c.buildTaskMetricsRequest(in)
if err != nil {
return nil, fmt.Errorf("failed to build metrics request: %w", err)
}
resp, err := c.client.TaskService().Metrics(ctx, request)
if err != nil {
return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err)
}
criStats, err := c.toCRIContainerStats(ctx, resp.Metrics, containers)
if err != nil {
return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err)
}
return criStats, nil
}
type metricsHandler func(containerstore.Metadata, *types.Metric) (*runtime.ContainerStats, error)
// Returns a function to be used for transforming container metrics into the right format.
// Uses the platform the given sandbox advertises to implement its logic. If the platform is
// unsupported for metrics this will return a wrapped [errdefs.ErrNotImplemented].
func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (metricsHandler, error) {
sandbox, err := c.sandboxStore.Get(sandboxID)
if err != nil {
return nil, fmt.Errorf("failed to find sandbox id %q: %w", sandboxID, err)
}
controller, err := c.sandboxService.SandboxController(sandbox.Config, sandbox.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox controller: %w", err)
}
// Grab the platform that this containers sandbox advertises. Reason being, even if
// the host may be {insert platform}, if it virtualizes or emulates a different platform
// it will return stats in that format, and we need to handle the conversion logic based
// off of this info.
p, err := controller.Platform(ctx, sandboxID)
if err != nil {
return nil, err
}
ociRuntime, err := c.config.GetSandboxRuntime(sandbox.Config, sandbox.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get runtimeHandler %q: %w", sandbox.RuntimeHandler, err)
}
snapshotter := c.RuntimeSnapshotter(ctx, ociRuntime)
switch p.OS {
case "windows":
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
return c.windowsContainerMetrics(meta, stats, snapshotter)
}, nil
case "linux":
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
return c.linuxContainerMetrics(meta, stats, snapshotter)
}, nil
default:
return nil, fmt.Errorf("container metrics for platform %+v: %w", p, errdefs.ErrNotImplemented)
}
}
func (c *criService) toCRIContainerStats(
ctx context.Context,
stats []*types.Metric,
containers []containerstore.Container,
) (*runtime.ListContainerStatsResponse, error) {
statsMap := make(map[string]*types.Metric)
for _, stat := range stats {
statsMap[stat.ID] = stat
}
containerStats := new(runtime.ListContainerStatsResponse)
// Unfortunately if no filter was passed we're asking for every containers stats which
// generally belong to multiple different pods, who all might have different platforms.
// To avoid recalculating the right metricsHandler to invoke, if we've already calculated
// the platform and handler for a given sandbox just pull it from our map here.
var (
err error
handler metricsHandler
)
sandboxToMetricsHandler := make(map[string]metricsHandler)
for _, cntr := range containers {
h, ok := sandboxToMetricsHandler[cntr.SandboxID]
if !ok {
handler, err = c.getMetricsHandler(ctx, cntr.SandboxID)
if err != nil {
// If the sandbox is not found, it may have been removed. we need to check container whether it is still exist
if errdefs.IsNotFound(err) {
_, err = c.containerStore.Get(cntr.ID)
if err != nil && errdefs.IsNotFound(err) {
log.G(ctx).Warnf("container %q is not found, skip it", cntr.ID)
continue
}
}
return nil, fmt.Errorf("failed to get metrics handler for container %q: %w", cntr.ID, err)
}
sandboxToMetricsHandler[cntr.SandboxID] = handler
} else {
handler = h
}
cs, err := handler(cntr.Metadata, statsMap[cntr.ID])
if err != nil {
return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err)
}
if cs.Cpu != nil && cs.Cpu.UsageCoreNanoSeconds != nil {
// this is a calculated value and should be computed for all OSes
nanoUsage, err := c.getUsageNanoCores(cntr.Metadata.ID, false, cs.Cpu.UsageCoreNanoSeconds.Value, time.Unix(0, cs.Cpu.Timestamp))
if err != nil {
return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", cntr.Metadata.ID, err)
}
cs.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoUsage}
}
containerStats.Stats = append(containerStats.Stats, cs)
}
return containerStats, nil
}
func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, currentUsageCoreNanoSeconds uint64, currentTimestamp time.Time) (uint64, error) {
var oldStats *stats.ContainerStats
if isSandbox {
sandbox, err := c.sandboxStore.Get(containerID)
if err != nil {
return 0, fmt.Errorf("failed to get sandbox container: %s: %w", containerID, err)
}
oldStats = sandbox.Stats
} else {
container, err := c.containerStore.Get(containerID)
if err != nil {
return 0, fmt.Errorf("failed to get container ID: %s: %w", containerID, err)
}
oldStats = container.Stats
}
if oldStats == nil {
newStats := &stats.ContainerStats{
UsageCoreNanoSeconds: currentUsageCoreNanoSeconds,
Timestamp: currentTimestamp,
}
if isSandbox {
err := c.sandboxStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update sandbox stats container ID: %s: %w", containerID, err)
}
} else {
err := c.containerStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err)
}
}
return 0, nil
}
nanoSeconds := currentTimestamp.UnixNano() - oldStats.Timestamp.UnixNano()
// zero or negative interval
if nanoSeconds <= 0 {
return 0, nil
}
newUsageNanoCores := uint64(float64(currentUsageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) /
float64(nanoSeconds) * float64(time.Second/time.Nanosecond))
newStats := &stats.ContainerStats{
UsageCoreNanoSeconds: currentUsageCoreNanoSeconds,
Timestamp: currentTimestamp,
}
if isSandbox {
err := c.sandboxStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update sandbox container stats: %s: %w", containerID, err)
}
} else {
err := c.containerStore.UpdateContainerStats(containerID, newStats)
if err != nil {
return 0, fmt.Errorf("failed to update container stats ID: %s: %w", containerID, err)
}
}
return newUsageNanoCores, nil
}
func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) {
if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
filter.Id = cntr.ID
}
if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil {
filter.PodSandboxId = sb.ID
}
}
// buildTaskMetricsRequest constructs a tasks.MetricsRequest based on
// the information in the stats request and the containerStore
func (c *criService) buildTaskMetricsRequest(
r *runtime.ListContainerStatsRequest,
) (*tasks.MetricsRequest, []containerstore.Container, error) {
req := &tasks.MetricsRequest{}
if r.GetFilter() == nil {
return req, c.containerStore.List(), nil
}
c.normalizeContainerStatsFilter(r.GetFilter())
var containers []containerstore.Container
for _, cntr := range c.containerStore.List() {
if r.GetFilter().GetId() != "" && cntr.ID != r.GetFilter().GetId() {
continue
}
if r.GetFilter().GetPodSandboxId() != "" && cntr.SandboxID != r.GetFilter().GetPodSandboxId() {
continue
}
if r.GetFilter().GetLabelSelector() != nil &&
!matchLabelSelector(r.GetFilter().GetLabelSelector(), cntr.Config.GetLabels()) {
continue
}
containers = append(containers, cntr)
req.Filters = append(req.Filters, "id=="+cntr.ID)
}
return req, containers, nil
}
func matchLabelSelector(selector, labels map[string]string) bool {
for k, v := range selector {
if val, ok := labels[k]; ok {
if v != val {
return false
}
} else {
return false
}
}
return true
}
func (c *criService) windowsContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
snapshotter string,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.GetSnapshot(meta.ID, snapshotter)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPaths[snapshotter],
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
if stats != nil {
s, err := typeurl.UnmarshalAny(stats.Data)
if err != nil {
return nil, fmt.Errorf("failed to extract container metrics: %w", err)
}
wstats := s.(*wstats.Statistics).GetWindows()
if wstats == nil {
return nil, errors.New("windows stats is empty")
}
if wstats.Processor != nil {
cs.Cpu = &runtime.CpuUsage{
Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS},
}
}
if wstats.Memory != nil {
cs.Memory = &runtime.MemoryUsage{
Timestamp: (protobuf.FromTimestamp(wstats.Timestamp)).UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes,
},
}
}
}
return &cs, nil
}
func (c *criService) linuxContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
snapshotter string,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.GetSnapshot(meta.ID, snapshotter)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPaths[snapshotter],
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
if stats != nil {
var data interface{}
switch {
case typeurl.Is(stats.Data, (*cg1.Metrics)(nil)):
data = &cg1.Metrics{}
case typeurl.Is(stats.Data, (*cg2.Metrics)(nil)):
data = &cg2.Metrics{}
case typeurl.Is(stats.Data, (*wstats.Statistics)(nil)):
data = &wstats.Statistics{}
default:
return nil, errors.New("cannot convert metric data to cgroups.Metrics or windows.Statistics")
}
if err := typeurl.UnmarshalTo(stats.Data, data); err != nil {
return nil, fmt.Errorf("failed to extract container metrics: %w", err)
}
cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp))
if err != nil {
return nil, fmt.Errorf("failed to obtain cpu stats: %w", err)
}
cs.Cpu = cpuStats
memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp))
if err != nil {
return nil, fmt.Errorf("failed to obtain memory stats: %w", err)
}
cs.Memory = memoryStats
}
return &cs, nil
}
// getWorkingSet calculates workingset memory from cgroup memory stats.
// The caller should make sure memory is not nil.
// workingset = usage - total_inactive_file
func getWorkingSet(memory *cg1.MemoryStat) uint64 {
if memory.Usage == nil {
return 0
}
var workingSet uint64
if memory.TotalInactiveFile < memory.Usage.Usage {
workingSet = memory.Usage.Usage - memory.TotalInactiveFile
}
return workingSet
}
// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats.
// The caller should make sure memory is not nil.
// workingset = usage - inactive_file
func getWorkingSetV2(memory *cg2.MemoryStat) uint64 {
var workingSet uint64
if memory.InactiveFile < memory.Usage {
workingSet = memory.Usage - memory.InactiveFile
}
return workingSet
}
func isMemoryUnlimited(v uint64) bool {
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
// TODO: k8s or cadvisor should export this https://github.com/google/cadvisor/blob/2b6fbacac7598e0140b5bc8428e3bdd7d86cf5b9/metrics/prometheus.go#L1969-L1971
const maxMemorySize = uint64(1 << 62)
return v > maxMemorySize
}
// https://github.com/kubernetes/kubernetes/blob/b47f8263e18c7b13dba33fba23187e5e0477cdbd/pkg/kubelet/stats/helper.go#L68-L71
func getAvailableBytes(memory *cg1.MemoryStat, workingSetBytes uint64) uint64 {
// memory limit - working set bytes
if !isMemoryUnlimited(memory.Usage.Limit) {
return memory.Usage.Limit - workingSetBytes
}
return 0
}
func getAvailableBytesV2(memory *cg2.MemoryStat, workingSetBytes uint64) uint64 {
// memory limit (memory.max) for cgroupv2 - working set bytes
if !isMemoryUnlimited(memory.UsageLimit) {
return memory.UsageLimit - workingSetBytes
}
return 0
}
func (c *criService) cpuContainerStats(ID string, isSandbox bool, stats interface{}, timestamp time.Time) (*runtime.CpuUsage, error) {
switch metrics := stats.(type) {
case *cg1.Metrics:
metrics.GetCPU().GetUsage()
if metrics.CPU != nil && metrics.CPU.Usage != nil {
return &runtime.CpuUsage{
Timestamp: timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total},
}, nil
}
case *cg2.Metrics:
if metrics.CPU != nil {
// convert to nano seconds
usageCoreNanoSeconds := metrics.CPU.UsageUsec * 1000
return &runtime.CpuUsage{
Timestamp: timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: usageCoreNanoSeconds},
}, nil
}
default:
return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath())
}
return nil, nil
}
func (c *criService) memoryContainerStats(ID string, stats interface{}, timestamp time.Time) (*runtime.MemoryUsage, error) {
switch metrics := stats.(type) {
case *cg1.Metrics:
if metrics.Memory != nil && metrics.Memory.Usage != nil {
workingSetBytes := getWorkingSet(metrics.Memory)
return &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: workingSetBytes,
},
AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytes(metrics.Memory, workingSetBytes)},
UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage.Usage},
RssBytes: &runtime.UInt64Value{Value: metrics.Memory.TotalRSS},
PageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgFault},
MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.TotalPgMajFault},
}, nil
}
case *cg2.Metrics:
if metrics.Memory != nil {
workingSetBytes := getWorkingSetV2(metrics.Memory)
return &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: workingSetBytes,
},
AvailableBytes: &runtime.UInt64Value{Value: getAvailableBytesV2(metrics.Memory, workingSetBytes)},
UsageBytes: &runtime.UInt64Value{Value: metrics.Memory.Usage},
// Use Anon memory for RSS as cAdvisor on cgroupv2
// see https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799
RssBytes: &runtime.UInt64Value{Value: metrics.Memory.Anon},
PageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgfault},
MajorPageFaults: &runtime.UInt64Value{Value: metrics.Memory.Pgmajfault},
}, nil
}
default:
return nil, fmt.Errorf("unexpected metrics type: %T from %s", metrics, reflect.TypeOf(metrics).Elem().PkgPath())
}
return nil, nil
}

View File

@@ -1,437 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"math"
"reflect"
"testing"
"time"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
v2 "github.com/containerd/cgroups/v3/cgroup2/stats"
"github.com/containerd/containerd/v2/api/types"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestContainerMetricsCPUNanoCoreUsage(t *testing.T) {
c := newTestCRIService()
timestamp := time.Now()
secondAfterTimeStamp := timestamp.Add(time.Second)
ID := "ID"
for _, test := range []struct {
desc string
firstCPUValue uint64
secondCPUValue uint64
expectedNanoCoreUsageFirst uint64
expectedNanoCoreUsageSecond uint64
}{
{
desc: "metrics",
firstCPUValue: 50,
secondCPUValue: 500,
expectedNanoCoreUsageFirst: 0,
expectedNanoCoreUsageSecond: 450,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: ID},
)
assert.NoError(t, err)
assert.Nil(t, container.Stats)
err = c.containerStore.Add(container)
assert.NoError(t, err)
cpuUsage, err := c.getUsageNanoCores(ID, false, test.firstCPUValue, timestamp)
assert.NoError(t, err)
container, err = c.containerStore.Get(ID)
assert.NoError(t, err)
assert.NotNil(t, container.Stats)
assert.Equal(t, test.expectedNanoCoreUsageFirst, cpuUsage)
cpuUsage, err = c.getUsageNanoCores(ID, false, test.secondCPUValue, secondAfterTimeStamp)
assert.NoError(t, err)
assert.Equal(t, test.expectedNanoCoreUsageSecond, cpuUsage)
container, err = c.containerStore.Get(ID)
assert.NoError(t, err)
assert.NotNil(t, container.Stats)
})
}
}
func TestGetWorkingSet(t *testing.T) {
for _, test := range []struct {
desc string
memory *v1.MemoryStat
expected uint64
}{
{
desc: "nil memory usage",
memory: &v1.MemoryStat{},
expected: 0,
},
{
desc: "memory usage higher than inactive_total_file",
memory: &v1.MemoryStat{
TotalInactiveFile: 1000,
Usage: &v1.MemoryEntry{Usage: 2000},
},
expected: 1000,
},
{
desc: "memory usage lower than inactive_total_file",
memory: &v1.MemoryStat{
TotalInactiveFile: 2000,
Usage: &v1.MemoryEntry{Usage: 1000},
},
expected: 0,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getWorkingSet(test.memory)
assert.Equal(t, test.expected, got)
})
}
}
func TestGetWorkingSetV2(t *testing.T) {
for _, test := range []struct {
desc string
memory *v2.MemoryStat
expected uint64
}{
{
desc: "nil memory usage",
memory: &v2.MemoryStat{},
expected: 0,
},
{
desc: "memory usage higher than inactive_total_file",
memory: &v2.MemoryStat{
InactiveFile: 1000,
Usage: 2000,
},
expected: 1000,
},
{
desc: "memory usage lower than inactive_total_file",
memory: &v2.MemoryStat{
InactiveFile: 2000,
Usage: 1000,
},
expected: 0,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getWorkingSetV2(test.memory)
assert.Equal(t, test.expected, got)
})
}
}
func TestGetAvailableBytes(t *testing.T) {
for _, test := range []struct {
desc string
memory *v1.MemoryStat
workingSetBytes uint64
expected uint64
}{
{
desc: "no limit",
memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: math.MaxUint64, // no limit
Usage: 1000,
},
},
workingSetBytes: 500,
expected: 0,
},
{
desc: "with limit",
memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: 5000,
Usage: 1000,
},
},
workingSetBytes: 500,
expected: 5000 - 500,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getAvailableBytes(test.memory, test.workingSetBytes)
assert.Equal(t, test.expected, got)
})
}
}
func TestGetAvailableBytesV2(t *testing.T) {
for _, test := range []struct {
desc string
memory *v2.MemoryStat
workingSetBytes uint64
expected uint64
}{
{
desc: "no limit",
memory: &v2.MemoryStat{
UsageLimit: math.MaxUint64, // no limit
Usage: 1000,
},
workingSetBytes: 500,
expected: 0,
},
{
desc: "with limit",
memory: &v2.MemoryStat{
UsageLimit: 5000,
Usage: 1000,
},
workingSetBytes: 500,
expected: 5000 - 500,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := getAvailableBytesV2(test.memory, test.workingSetBytes)
assert.Equal(t, test.expected, got)
})
}
}
func TestContainerMetricsMemory(t *testing.T) {
c := newTestCRIService()
timestamp := time.Now()
for _, test := range []struct {
desc string
metrics interface{}
expected *runtime.MemoryUsage
}{
{
desc: "v1 metrics - no memory limit",
metrics: &v1.Metrics{
Memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: math.MaxUint64, // no limit
Usage: 1000,
},
TotalRSS: 10,
TotalPgFault: 11,
TotalPgMajFault: 12,
TotalInactiveFile: 500,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 500},
AvailableBytes: &runtime.UInt64Value{Value: 0},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 10},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
{
desc: "v1 metrics - memory limit",
metrics: &v1.Metrics{
Memory: &v1.MemoryStat{
Usage: &v1.MemoryEntry{
Limit: 5000,
Usage: 1000,
},
TotalRSS: 10,
TotalPgFault: 11,
TotalPgMajFault: 12,
TotalInactiveFile: 500,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 500},
AvailableBytes: &runtime.UInt64Value{Value: 4500},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 10},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
{
desc: "v2 metrics - memory limit",
metrics: &v2.Metrics{
Memory: &v2.MemoryStat{
Usage: 1000,
UsageLimit: 5000,
InactiveFile: 0,
Pgfault: 11,
Pgmajfault: 12,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 1000},
AvailableBytes: &runtime.UInt64Value{Value: 4000},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 0},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
{
desc: "v2 metrics - no memory limit",
metrics: &v2.Metrics{
Memory: &v2.MemoryStat{
Usage: 1000,
UsageLimit: math.MaxUint64, // no limit
InactiveFile: 0,
Pgfault: 11,
Pgmajfault: 12,
},
},
expected: &runtime.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{Value: 1000},
AvailableBytes: &runtime.UInt64Value{Value: 0},
UsageBytes: &runtime.UInt64Value{Value: 1000},
RssBytes: &runtime.UInt64Value{Value: 0},
PageFaults: &runtime.UInt64Value{Value: 11},
MajorPageFaults: &runtime.UInt64Value{Value: 12},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got, err := c.memoryContainerStats("ID", test.metrics, timestamp)
assert.NoError(t, err)
assert.Equal(t, test.expected, got)
})
}
}
func TestListContainerStats(t *testing.T) {
c := newTestCRIService()
type args struct {
ctx context.Context
stats []*types.Metric
containers []containerstore.Container
}
tests := []struct {
name string
args args
before func()
after func()
want *runtime.ListContainerStatsResponse
wantErr bool
}{
{
name: "args containers having c1,but containerStore not found c1, so filter c1",
args: args{
ctx: context.Background(),
stats: []*types.Metric{
{
ID: "c1",
},
},
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{
ID: "c1",
SandboxID: "s1",
},
},
},
},
want: &runtime.ListContainerStatsResponse{},
},
{
name: "args containers having c1,c2, but containerStore not found c1, so filter c1",
args: args{
ctx: context.Background(),
stats: []*types.Metric{
{
ID: "c1",
},
{
ID: "c2",
},
},
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{
ID: "c1",
SandboxID: "s1",
},
},
{
Metadata: containerstore.Metadata{
ID: "c2",
SandboxID: "s2",
},
},
},
},
before: func() {
c.containerStore.Add(containerstore.Container{
Metadata: containerstore.Metadata{
ID: "c2",
},
})
c.sandboxStore.Add(sandboxstore.Sandbox{
Metadata: sandboxstore.Metadata{
ID: "s2",
},
})
},
wantErr: true,
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.before != nil {
tt.before()
}
got, err := c.toCRIContainerStats(tt.args.ctx, tt.args.stats, tt.args.containers)
if tt.after != nil {
tt.after()
}
if (err != nil) != tt.wantErr {
t.Errorf("ListContainerStats() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ListContainerStats() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -1,185 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"encoding/json"
"fmt"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
"github.com/containerd/containerd/v2/pkg/cri/util"
"github.com/containerd/errdefs"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ContainerStatus inspects the container and returns the status.
func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (*runtime.ContainerStatusResponse, error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
// TODO(random-liu): Clean up the following logic in CRI.
// Current assumption:
// * ImageSpec in container config is image ID.
// * ImageSpec in container status is image tag.
// * ImageRef in container status is repo digest.
spec := container.Config.GetImage()
imageRef := container.ImageRef
image, err := c.GetImage(imageRef)
if err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to get image %q: %w", imageRef, err)
}
} else {
repoTags, repoDigests := util.ParseImageReferences(image.References)
if len(repoTags) > 0 {
// Based on current behavior of dockershim, this field should be
// image tag.
spec = &runtime.ImageSpec{Image: repoTags[0]}
}
if len(repoDigests) > 0 {
// Based on the CRI definition, this field will be consumed by user.
imageRef = repoDigests[0]
}
}
status := toCRIContainerStatus(container, spec, imageRef)
if status.GetCreatedAt() == 0 {
// CRI doesn't allow CreatedAt == 0.
info, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get CreatedAt in %q state: %w", status.State, err)
}
status.CreatedAt = info.CreatedAt.UnixNano()
}
info, err := toCRIContainerInfo(ctx, container, r.GetVerbose())
if err != nil {
return nil, fmt.Errorf("failed to get verbose container info: %w", err)
}
return &runtime.ContainerStatusResponse{
Status: status,
Info: info,
}, nil
}
// toCRIContainerStatus converts internal container object to CRI container status.
func toCRIContainerStatus(container containerstore.Container, spec *runtime.ImageSpec, imageRef string) *runtime.ContainerStatus {
meta := container.Metadata
status := container.Status.Get()
reason := status.Reason
if status.State() == runtime.ContainerState_CONTAINER_EXITED && reason == "" {
if status.ExitCode == 0 {
reason = completeExitReason
} else {
reason = errorExitReason
}
}
// If container is in the created state, not set started and finished unix timestamps
var st, ft int64
switch status.State() {
case runtime.ContainerState_CONTAINER_RUNNING:
// If container is in the running state, set started unix timestamps
st = status.StartedAt
case runtime.ContainerState_CONTAINER_EXITED, runtime.ContainerState_CONTAINER_UNKNOWN:
st, ft = status.StartedAt, status.FinishedAt
}
return &runtime.ContainerStatus{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
State: status.State(),
CreatedAt: status.CreatedAt,
StartedAt: st,
FinishedAt: ft,
ExitCode: status.ExitCode,
Image: spec,
ImageRef: imageRef,
Reason: reason,
Message: status.Message,
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
Mounts: meta.Config.GetMounts(),
LogPath: meta.LogPath,
Resources: status.Resources,
}
}
// ContainerInfo is extra information for a container.
type ContainerInfo struct {
// TODO(random-liu): Add sandboxID in CRI container status.
SandboxID string `json:"sandboxID"`
Pid uint32 `json:"pid"`
Removing bool `json:"removing"`
SnapshotKey string `json:"snapshotKey"`
Snapshotter string `json:"snapshotter"`
RuntimeType string `json:"runtimeType"`
RuntimeOptions interface{} `json:"runtimeOptions"`
Config *runtime.ContainerConfig `json:"config"`
RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"`
}
// toCRIContainerInfo converts internal container object information to CRI container status response info map.
func toCRIContainerInfo(ctx context.Context, container containerstore.Container, verbose bool) (map[string]string, error) {
if !verbose {
return nil, nil
}
meta := container.Metadata
status := container.Status.Get()
// TODO(random-liu): Change CRI status info to use array instead of map.
ci := &ContainerInfo{
SandboxID: container.SandboxID,
Pid: status.Pid,
Removing: status.Removing,
Config: meta.Config,
}
var err error
ci.RuntimeSpec, err = container.Container.Spec(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get container runtime spec: %w", err)
}
ctrInfo, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get container info: %w", err)
}
ci.SnapshotKey = ctrInfo.SnapshotKey
ci.Snapshotter = ctrInfo.Snapshotter
runtimeOptions, err := getRuntimeOptions(ctrInfo)
if err != nil {
return nil, fmt.Errorf("failed to get runtime options: %w", err)
}
ci.RuntimeType = ctrInfo.Runtime.Name
ci.RuntimeOptions = runtimeOptions
infoBytes, err := json.Marshal(ci)
if err != nil {
return nil, fmt.Errorf("failed to marshal info %v: %w", ci, err)
}
return map[string]string{
"info": string(infoBytes),
}, nil
}

View File

@@ -1,304 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"testing"
"time"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
snapshotstore "github.com/containerd/containerd/v2/pkg/cri/store/snapshot"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
)
func getContainerStatusTestData() (*containerstore.Metadata, *containerstore.Status,
*imagestore.Image, *runtime.ContainerStatus) {
imageID := "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
testID := "test-id"
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtime.ImageSpec{Image: "test-image"},
Mounts: []*runtime.Mount{{
ContainerPath: "test-container-path",
HostPath: "test-host-path",
}},
Labels: map[string]string{"a": "b"},
Annotations: map[string]string{"c": "d"},
}
createdAt := time.Now().UnixNano()
metadata := &containerstore.Metadata{
ID: testID,
Name: "test-long-name",
SandboxID: "test-sandbox-id",
Config: config,
ImageRef: imageID,
LogPath: "test-log-path",
}
status := &containerstore.Status{
Pid: 1234,
CreatedAt: createdAt,
}
image := &imagestore.Image{
ID: imageID,
References: []string{
"gcr.io/library/busybox:latest",
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
}
expected := &runtime.ContainerStatus{
Id: testID,
Metadata: config.GetMetadata(),
State: runtime.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
Image: &runtime.ImageSpec{Image: "gcr.io/library/busybox:latest"},
ImageRef: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
Reason: completeExitReason,
Labels: config.GetLabels(),
Annotations: config.GetAnnotations(),
Mounts: config.GetMounts(),
LogPath: "test-log-path",
}
return metadata, status, image, expected
}
func TestToCRIContainerStatus(t *testing.T) {
for _, test := range []struct {
desc string
startedAt int64
finishedAt int64
exitCode int32
reason string
message string
expectedState runtime.ContainerState
expectedReason string
}{
{
desc: "container created",
expectedState: runtime.ContainerState_CONTAINER_CREATED,
},
{
desc: "container running",
startedAt: time.Now().UnixNano(),
expectedState: runtime.ContainerState_CONTAINER_RUNNING,
},
{
desc: "container exited with reason",
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
exitCode: 1,
reason: "test-reason",
message: "test-message",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
expectedReason: "test-reason",
},
{
desc: "container exited with exit code 0 without reason",
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
exitCode: 0,
message: "test-message",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
expectedReason: completeExitReason,
},
{
desc: "container exited with non-zero exit code without reason",
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
exitCode: 1,
message: "test-message",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
expectedReason: errorExitReason,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
metadata, status, _, expected := getContainerStatusTestData()
// Update status with test case.
status.StartedAt = test.startedAt
status.FinishedAt = test.finishedAt
status.ExitCode = test.exitCode
status.Reason = test.reason
status.Message = test.message
container, err := containerstore.NewContainer(
*metadata,
containerstore.WithFakeStatus(*status),
)
assert.NoError(t, err)
// Set expectation based on test case.
expected.Reason = test.expectedReason
expected.StartedAt = test.startedAt
expected.FinishedAt = test.finishedAt
expected.ExitCode = test.exitCode
expected.Message = test.message
patchExceptedWithState(expected, test.expectedState)
containerStatus := toCRIContainerStatus(container,
expected.Image,
expected.ImageRef)
assert.Equal(t, expected, containerStatus, test.desc)
})
}
}
// TODO(mikebrow): add a fake containerd container.Container.Spec client api so we can test verbose is true option
func TestToCRIContainerInfo(t *testing.T) {
metadata, status, _, _ := getContainerStatusTestData()
container, err := containerstore.NewContainer(
*metadata,
containerstore.WithFakeStatus(*status),
)
assert.NoError(t, err)
info, err := toCRIContainerInfo(context.Background(),
container,
false)
assert.NoError(t, err)
assert.Nil(t, info)
}
func TestContainerStatus(t *testing.T) {
for _, test := range []struct {
desc string
exist bool
imageExist bool
startedAt int64
finishedAt int64
reason string
expectedState runtime.ContainerState
expectErr bool
}{
{
desc: "container created",
exist: true,
imageExist: true,
expectedState: runtime.ContainerState_CONTAINER_CREATED,
},
{
desc: "container running",
exist: true,
imageExist: true,
startedAt: time.Now().UnixNano(),
expectedState: runtime.ContainerState_CONTAINER_RUNNING,
},
{
desc: "container exited",
exist: true,
imageExist: true,
startedAt: time.Now().UnixNano(),
finishedAt: time.Now().UnixNano(),
reason: "test-reason",
expectedState: runtime.ContainerState_CONTAINER_EXITED,
},
{
desc: "container not exist",
exist: false,
imageExist: true,
expectErr: true,
},
{
desc: "image not exist",
exist: false,
imageExist: false,
expectErr: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
metadata, status, image, expected := getContainerStatusTestData()
// Update status with test case.
status.StartedAt = test.startedAt
status.FinishedAt = test.finishedAt
status.Reason = test.reason
container, err := containerstore.NewContainer(
*metadata,
containerstore.WithFakeStatus(*status),
)
assert.NoError(t, err)
if test.exist {
assert.NoError(t, c.containerStore.Add(container))
}
if test.imageExist {
imageStore, err := imagestore.NewFakeStore([]imagestore.Image{*image})
assert.NoError(t, err)
c.ImageService = &fakeImageService{imageStore: imageStore}
}
resp, err := c.ContainerStatus(context.Background(), &runtime.ContainerStatusRequest{ContainerId: container.ID})
if test.expectErr {
assert.Error(t, err)
assert.Nil(t, resp)
return
}
// Set expectation based on test case.
expected.StartedAt = test.startedAt
expected.FinishedAt = test.finishedAt
expected.Reason = test.reason
patchExceptedWithState(expected, test.expectedState)
assert.Equal(t, expected, resp.GetStatus())
})
}
}
type fakeImageService struct {
imageStore *imagestore.Store
}
func (s *fakeImageService) RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string {
return ""
}
func (s *fakeImageService) UpdateImage(ctx context.Context, r string) error { return nil }
func (s *fakeImageService) CheckImages(ctx context.Context) error { return nil }
func (s *fakeImageService) GetImage(id string) (imagestore.Image, error) { return s.imageStore.Get(id) }
func (s *fakeImageService) GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error) {
return snapshotstore.Snapshot{}, errors.New("not implemented")
}
func (s *fakeImageService) LocalResolve(refOrID string) (imagestore.Image, error) {
return imagestore.Image{}, errors.New("not implemented")
}
func (s *fakeImageService) ImageFSPaths() map[string]string { return make(map[string]string) }
func (s *fakeImageService) PullImage(context.Context, string, func(string) (string, string, error), *runtime.PodSandboxConfig) (string, error) {
return "", errors.New("not implemented")
}
func patchExceptedWithState(expected *runtime.ContainerStatus, state runtime.ContainerState) {
expected.State = state
switch state {
case runtime.ContainerState_CONTAINER_CREATED:
expected.StartedAt, expected.FinishedAt = 0, 0
case runtime.ContainerState_CONTAINER_RUNNING:
expected.FinishedAt = 0
}
}

View File

@@ -1,219 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"sync/atomic"
"syscall"
"time"
eventtypes "github.com/containerd/containerd/v2/api/events"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
ctrdutil "github.com/containerd/containerd/v2/pkg/cri/util"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/moby/sys/signal"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// StopContainer stops a running container with a grace period (i.e., timeout).
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
start := time.Now()
// Get container config from container store.
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
return nil, err
}
sandbox, err := c.sandboxStore.Get(container.SandboxID)
if err != nil {
err = c.nri.StopContainer(ctx, nil, &container)
} else {
err = c.nri.StopContainer(ctx, &sandbox, &container)
}
if err != nil {
log.G(ctx).WithError(err).Error("NRI failed to stop container")
}
i, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("get container info: %w", err)
}
containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start)
return &runtime.StopContainerResponse{}, nil
}
// stopContainer stops a container based on the container metadata.
func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
id := container.ID
sandboxID := container.SandboxID
// Return without error if container is not running. This makes sure that
// stop only takes real action after the container is started.
state := container.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING &&
state != runtime.ContainerState_CONTAINER_UNKNOWN {
log.G(ctx).Infof("Container to stop %q must be in running or unknown state, current state %q",
id, criContainerStateToString(state))
return nil
}
task, err := container.Container.Task(ctx, nil)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get task for container %q: %w", id, err)
}
// Don't return for unknown state, some cleanup needs to be done.
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
return cleanupUnknownContainer(ctx, id, container, sandboxID, c)
}
return nil
}
// Handle unknown state.
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
// Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to wait for task for %q: %w", id, err)
}
return cleanupUnknownContainer(ctx, id, container, sandboxID, c)
}
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startContainerExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
}
// We only need to kill the task. The event handler will Delete the
// task from containerd after it handles the Exited event.
if timeout > 0 {
stopSignal := "SIGTERM"
if container.StopSignal != "" {
stopSignal = container.StopSignal
} else {
// The image may have been deleted, and the `StopSignal` field is
// just introduced to handle that.
// However, for containers created before the `StopSignal` field is
// introduced, still try to get the stop signal from the image config.
// If the image has been deleted, logging an error and using the
// default SIGTERM is still better than returning error and leaving
// the container unstoppable. (See issue #990)
// TODO(random-liu): Remove this logic when containerd 1.2 is deprecated.
image, err := c.GetImage(container.ImageRef)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get image %q: %w", container.ImageRef, err)
}
log.G(ctx).Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal)
} else {
if image.ImageSpec.Config.StopSignal != "" {
stopSignal = image.ImageSpec.Config.StopSignal
}
}
}
sig, err := signal.ParseSignal(stopSignal)
if err != nil {
return fmt.Errorf("failed to parse stop signal %q: %w", stopSignal, err)
}
var sswt bool
if container.IsStopSignaledWithTimeout == nil {
log.G(ctx).Infof("unable to ensure stop signal %v was not sent twice to container %v", sig, id)
sswt = true
} else {
sswt = atomic.CompareAndSwapUint32(container.IsStopSignaledWithTimeout, 0, 1)
}
if sswt {
log.G(ctx).Infof("Stop container %q with signal %v", id, sig)
if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to stop container %q: %w", id, err)
}
} else {
log.G(ctx).Infof("Skipping the sending of signal %v to container %q because a prior stop with timeout>0 request already sent the signal", sig, id)
}
sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout)
defer sigTermCtxCancel()
err = c.waitContainerStop(sigTermCtx, container)
if err == nil {
// Container stopped on first signal no need for SIGKILL
return nil
}
// If the parent context was cancelled or exceeded return immediately
if ctx.Err() != nil {
return ctx.Err()
}
// sigTermCtx was exceeded. Send SIGKILL
log.G(ctx).Debugf("Stop container %q with signal %v timed out", id, sig)
}
log.G(ctx).Infof("Kill container %q", id)
if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to kill container %q: %w", id, err)
}
// Wait for a fixed timeout until container stop is observed by event monitor.
err = c.waitContainerStop(ctx, container)
if err != nil {
return fmt.Errorf("an error occurs during waiting for container %q to be killed: %w", id, err)
}
return nil
}
// waitContainerStop waits for container to be stopped until context is
// cancelled or the context deadline is exceeded.
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error {
select {
case <-ctx.Done():
return fmt.Errorf("wait container %q: %w", container.ID, ctx.Err())
case <-container.Stopped():
return nil
}
}
// cleanupUnknownContainer cleanup stopped container in unknown state.
func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container, sandboxID string, c *criService) error {
// Reuse handleContainerExit to do the cleanup.
return handleContainerExit(ctx, &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: 0,
ExitStatus: unknownExitCode,
ExitedAt: protobuf.ToTimestamp(time.Now()),
}, cntr, sandboxID, c)
}

View File

@@ -1,92 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
)
func TestWaitContainerStop(t *testing.T) {
id := "test-id"
for _, test := range []struct {
desc string
status *containerstore.Status
cancel bool
timeout time.Duration
expectErr bool
}{
{
desc: "should return error if timeout exceeds",
status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
timeout: 200 * time.Millisecond,
expectErr: true,
},
{
desc: "should return error if context is cancelled",
status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
timeout: time.Hour,
cancel: true,
expectErr: true,
},
{
desc: "should not return error if container is stopped before timeout",
status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
timeout: time.Hour,
expectErr: false,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c := newTestCRIService()
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: id},
containerstore.WithFakeStatus(*test.status),
)
assert.NoError(t, err)
assert.NoError(t, c.containerStore.Add(container))
ctx := context.Background()
if test.cancel {
cancelledCtx, cancel := context.WithCancel(ctx)
cancel()
ctx = cancelledCtx
}
if test.timeout > 0 {
timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
defer cancel()
ctx = timeoutCtx
}
err = c.waitContainerStop(ctx, container)
assert.Equal(t, test.expectErr, err != nil, test.desc)
})
}
}

View File

@@ -1,157 +0,0 @@
//go:build !darwin && !freebsd
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
gocontext "context"
"fmt"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/errdefs"
"github.com/containerd/log"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
ctrdutil "github.com/containerd/containerd/v2/pkg/cri/util"
)
// UpdateContainerResources updates ContainerConfig of the container.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container: %w", err)
}
sandbox, err := c.sandboxStore.Get(container.SandboxID)
if err != nil {
return nil, err
}
resources := r.GetLinux()
updated, err := c.nri.UpdateContainerResources(ctx, &sandbox, &container, resources)
if err != nil {
return nil, fmt.Errorf("NRI container update failed: %w", err)
}
if updated != nil {
*resources = *updated
}
// Update resources in status update transaction, so that:
// 1) There won't be race condition with container start.
// 2) There won't be concurrent resource update to the same container.
if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
return c.updateContainerResources(ctx, container, r, status)
}); err != nil {
return nil, fmt.Errorf("failed to update resources: %w", err)
}
err = c.nri.PostUpdateContainerResources(ctx, &sandbox, &container)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI post-update notification failed")
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}
func (c *criService) updateContainerResources(ctx context.Context,
cntr containerstore.Container,
r *runtime.UpdateContainerResourcesRequest,
status containerstore.Status) (newStatus containerstore.Status, retErr error) {
newStatus = status
id := cntr.ID
// Do not update the container when there is a removal in progress.
if status.Removing {
return newStatus, fmt.Errorf("container %q is in removing state", id)
}
// Update container spec. If the container is not started yet, updating
// spec makes sure that the resource limits are correct when start;
// if the container is already started, updating spec is still required,
// the spec will become our source of truth for resource limits.
oldSpec, err := cntr.Container.Spec(ctx)
if err != nil {
return newStatus, fmt.Errorf("failed to get container spec: %w", err)
}
newSpec, err := updateOCIResource(ctx, oldSpec, r, c.config)
if err != nil {
return newStatus, fmt.Errorf("failed to update resource in spec: %w", err)
}
if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil {
return newStatus, err
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Reset spec on error.
if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id)
}
} else {
// Update container status only when the spec is updated
newStatus = copyResourcesToStatus(newSpec, status)
}
}()
// If container is not running, only update spec is enough, new resource
// limit will be applied when container start.
if status.State() != runtime.ContainerState_CONTAINER_RUNNING {
return newStatus, nil
}
task, err := cntr.Container.Task(ctx, nil)
if err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return newStatus, nil
}
return newStatus, fmt.Errorf("failed to get task: %w", err)
}
// newSpec.Linux / newSpec.Windows won't be nil
if err := task.Update(ctx, containerd.WithResources(getResources(newSpec))); err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return newStatus, nil
}
return newStatus, fmt.Errorf("failed to update resources: %w", err)
}
return newStatus, nil
}
// updateContainerSpec updates container spec.
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
s, err := typeurl.MarshalAny(spec)
if err != nil {
return fmt.Errorf("failed to marshal spec %+v: %w", spec, err)
}
if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = s
return nil
}); err != nil {
return fmt.Errorf("failed to update container spec: %w", err)
}
return nil
}

View File

@@ -1,51 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
"github.com/containerd/containerd/v2/pkg/cri/opts"
"github.com/containerd/containerd/v2/pkg/cri/util"
)
// updateOCIResource updates container resource limit.
func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest,
config criconfig.Config) (*runtimespec.Spec, error) {
// Copy to make sure old spec is not changed.
var cloned runtimespec.Spec
if err := util.DeepCopy(&cloned, spec); err != nil {
return nil, fmt.Errorf("failed to deep copy: %w", err)
}
if cloned.Linux == nil {
cloned.Linux = &runtimespec.Linux{}
}
if err := opts.WithResources(r.GetLinux(), config.TolerateMissingHugetlbController, config.DisableHugetlbController)(ctx, nil, nil, &cloned); err != nil {
return nil, fmt.Errorf("unable to set linux container resources: %w", err)
}
return &cloned, nil
}
func getResources(spec *runtimespec.Spec) interface{} {
return spec.Linux.Resources
}

View File

@@ -1,256 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"testing"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"google.golang.org/protobuf/proto"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
criopts "github.com/containerd/containerd/v2/pkg/cri/opts"
)
func TestUpdateOCILinuxResource(t *testing.T) {
oomscoreadj := new(int)
*oomscoreadj = -500
expectedSwap := func(swap int64) *int64 {
if criopts.SwapControllerAvailable() {
return &swap
}
return nil
}
for _, test := range []struct {
desc string
spec *runtimespec.Spec
request *runtime.UpdateContainerResourcesRequest
expected *runtimespec.Spec
expectErr bool
}{
{
desc: "should be able to update each resource",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(1111),
Quota: proto.Int64(2222),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "2-3",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuPeriod: 6666,
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetCpus: "4-5",
CpusetMems: "6-7",
Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"},
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(6666),
Cpus: "4-5",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "0"},
},
},
},
},
{
desc: "should skip empty fields",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(1111),
Quota: proto.Int64(2222),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "2-3",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetMems: "6-7",
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
},
{
desc: "should be able to fill empty fields",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuPeriod: 6666,
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetCpus: "4-5",
CpusetMems: "6-7",
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(6666),
Cpus: "4-5",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "65536", "memory.swap.max": "1024"},
},
},
},
},
{
desc: "should be able to patch the unified map",
spec: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(1111),
Quota: proto.Int64(2222),
Period: proto.Uint64(3333),
Cpus: "0-1",
Mems: "2-3",
},
Unified: map[string]string{"memory.min": "65536", "memory.max": "1507328"},
},
},
},
request: &runtime.UpdateContainerResourcesRequest{
Linux: &runtime.LinuxContainerResources{
CpuPeriod: 6666,
CpuQuota: 5555,
CpuShares: 4444,
MemoryLimitInBytes: 54321,
OomScoreAdj: 500,
CpusetCpus: "4-5",
CpusetMems: "6-7",
Unified: map[string]string{"memory.min": "1507328", "memory.swap.max": "1024"},
},
},
expected: &runtimespec.Spec{
Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
Linux: &runtimespec.Linux{
Resources: &runtimespec.LinuxResources{
Memory: &runtimespec.LinuxMemory{
Limit: proto.Int64(54321),
Swap: expectedSwap(54321),
},
CPU: &runtimespec.LinuxCPU{
Shares: proto.Uint64(4444),
Quota: proto.Int64(5555),
Period: proto.Uint64(6666),
Cpus: "4-5",
Mems: "6-7",
},
Unified: map[string]string{"memory.min": "1507328", "memory.max": "1507328", "memory.swap.max": "1024"},
},
},
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
config := criconfig.Config{
RuntimeConfig: criconfig.RuntimeConfig{
TolerateMissingHugetlbController: true,
DisableHugetlbController: false,
},
}
got, err := updateOCIResource(context.Background(), test.spec, test.request, config)
if test.expectErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
assert.Equal(t, test.expected, got)
})
}
}

View File

@@ -1,45 +0,0 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
)
// UpdateContainerResources updates ContainerConfig of the container.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("failed to find container: %w", err)
}
// Update resources in status update transaction, so that:
// 1) There won't be race condition with container start.
// 2) There won't be concurrent resource update to the same container.
if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
return status, nil
}); err != nil {
return nil, fmt.Errorf("failed to update resources: %w", err)
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}

View File

@@ -1,51 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
"github.com/containerd/containerd/v2/pkg/cri/opts"
"github.com/containerd/containerd/v2/pkg/cri/util"
)
// updateOCIResource updates container resource limit.
func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest,
config criconfig.Config) (*runtimespec.Spec, error) {
// Copy to make sure old spec is not changed.
var cloned runtimespec.Spec
if err := util.DeepCopy(&cloned, spec); err != nil {
return nil, fmt.Errorf("failed to deep copy: %w", err)
}
if cloned.Windows == nil {
cloned.Windows = &runtimespec.Windows{}
}
if err := opts.WithWindowsResources(r.GetWindows())(ctx, nil, nil, &cloned); err != nil {
return nil, fmt.Errorf("unable to set windows container resources: %w", err)
}
return &cloned, nil
}
func getResources(spec *runtimespec.Spec) interface{} {
return spec.Windows.Resources
}

View File

@@ -1,591 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/utils/clock"
eventtypes "github.com/containerd/containerd/v2/api/events"
apitasks "github.com/containerd/containerd/v2/api/services/tasks/v1"
containerd "github.com/containerd/containerd/v2/client"
containerdio "github.com/containerd/containerd/v2/pkg/cio"
"github.com/containerd/containerd/v2/pkg/cri/constants"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
sandboxstore "github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
ctrdutil "github.com/containerd/containerd/v2/pkg/cri/util"
"github.com/containerd/containerd/v2/pkg/events"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/errdefs"
)
const (
backOffInitDuration = 1 * time.Second
backOffMaxDuration = 5 * time.Minute
backOffExpireCheckDuration = 1 * time.Second
// handleEventTimeout is the timeout for handling 1 event. Event monitor
// handles events in serial, if one event blocks the event monitor, no
// other events can be handled.
// Add a timeout for each event handling, events that timeout will be requeued and
// handled again in the future.
handleEventTimeout = 10 * time.Second
)
// eventMonitor monitors containerd event and updates internal state correspondingly.
type eventMonitor struct {
c *criService
ch <-chan *events.Envelope
errCh <-chan error
ctx context.Context
cancel context.CancelFunc
backOff *backOff
}
type backOff struct {
// queuePoolMu is mutex used to protect the queuePool map
queuePoolMu sync.Mutex
queuePool map[string]*backOffQueue
// tickerMu is mutex used to protect the ticker.
tickerMu sync.Mutex
ticker *time.Ticker
minDuration time.Duration
maxDuration time.Duration
checkDuration time.Duration
clock clock.Clock
}
type backOffQueue struct {
events []interface{}
expireTime time.Time
duration time.Duration
clock clock.Clock
}
// Create new event monitor. New event monitor will start subscribing containerd event. All events
// happen after it should be monitored.
func newEventMonitor(c *criService) *eventMonitor {
ctx, cancel := context.WithCancel(context.Background())
return &eventMonitor{
c: c,
ctx: ctx,
cancel: cancel,
backOff: newBackOff(),
}
}
// subscribe starts to subscribe containerd events.
func (em *eventMonitor) subscribe(subscriber events.Subscriber) {
// note: filters are any match, if you want any match but not in namespace foo
// then you have to manually filter namespace foo
filters := []string{
`topic=="/tasks/oom"`,
`topic~="/images/"`,
}
em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...)
}
// startSandboxExitMonitor starts an exit monitor for a given sandbox.
func (em *eventMonitor) startSandboxExitMonitor(ctx context.Context, id string, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
select {
case exitRes := <-exitCh:
exitStatus, exitedAt, err := exitRes.Result()
if err != nil {
log.L.WithError(err).Errorf("failed to get task exit status for %q", id)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
e := &eventtypes.SandboxExit{
SandboxID: id,
ExitStatus: exitStatus,
ExitedAt: protobuf.ToTimestamp(exitedAt),
}
log.L.Debugf("received exit event %+v", e)
err = func() error {
dctx := ctrdutil.NamespacedContext()
dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout)
defer dcancel()
sb, err := em.c.sandboxStore.Get(e.GetSandboxID())
if err == nil {
if err := handleSandboxExit(dctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil {
return err
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get sandbox %s: %w", e.SandboxID, err)
}
return nil
}()
if err != nil {
log.L.WithError(err).Errorf("failed to handle sandbox TaskExit event %+v", e)
em.backOff.enBackOff(id, e)
}
return
case <-ctx.Done():
}
}()
return stopCh
}
// startContainerExitMonitor starts an exit monitor for a given container.
func (em *eventMonitor) startContainerExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
select {
case exitRes := <-exitCh:
exitStatus, exitedAt, err := exitRes.Result()
if err != nil {
log.L.WithError(err).Errorf("failed to get task exit status for %q", id)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
e := &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: pid,
ExitStatus: exitStatus,
ExitedAt: protobuf.ToTimestamp(exitedAt),
}
log.L.Debugf("received exit event %+v", e)
err = func() error {
dctx := ctrdutil.NamespacedContext()
dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout)
defer dcancel()
cntr, err := em.c.containerStore.Get(e.ID)
if err == nil {
if err := handleContainerExit(dctx, e, cntr, cntr.SandboxID, em.c); err != nil {
return err
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to get container %s: %w", e.ID, err)
}
return nil
}()
if err != nil {
log.L.WithError(err).Errorf("failed to handle container TaskExit event %+v", e)
em.backOff.enBackOff(id, e)
}
return
case <-ctx.Done():
}
}()
return stopCh
}
func convertEvent(e typeurl.Any) (string, interface{}, error) {
id := ""
evt, err := typeurl.UnmarshalAny(e)
if err != nil {
return "", nil, fmt.Errorf("failed to unmarshalany: %w", err)
}
switch e := evt.(type) {
case *eventtypes.TaskOOM:
id = e.ContainerID
case *eventtypes.SandboxExit:
id = e.SandboxID
case *eventtypes.ImageCreate:
id = e.Name
case *eventtypes.ImageUpdate:
id = e.Name
case *eventtypes.ImageDelete:
id = e.Name
default:
return "", nil, errors.New("unsupported event")
}
return id, evt, nil
}
// start starts the event monitor which monitors and handles all subscribed events.
// It returns an error channel for the caller to wait for stop errors from the
// event monitor.
//
// NOTE:
// 1. start must be called after subscribe.
// 2. The task exit event has been handled in individual startSandboxExitMonitor
// or startContainerExitMonitor goroutine at the first. If the goroutine fails,
// it puts the event into backoff retry queue and event monitor will handle
// it later.
func (em *eventMonitor) start() <-chan error {
errCh := make(chan error)
if em.ch == nil || em.errCh == nil {
panic("event channel is nil")
}
backOffCheckCh := em.backOff.start()
go func() {
defer close(errCh)
for {
select {
case e := <-em.ch:
log.L.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic)
if e.Namespace != constants.K8sContainerdNamespace {
log.L.Debugf("Ignoring events in namespace - %q", e.Namespace)
break
}
id, evt, err := convertEvent(e.Event)
if err != nil {
log.L.WithError(err).Errorf("Failed to convert event %+v", e)
break
}
if em.backOff.isInBackOff(id) {
log.L.Infof("Events for %q is in backoff, enqueue event %+v", id, evt)
em.backOff.enBackOff(id, evt)
break
}
if err := em.handleEvent(evt); err != nil {
log.L.WithError(err).Errorf("Failed to handle event %+v for %s", evt, id)
em.backOff.enBackOff(id, evt)
}
case err := <-em.errCh:
// Close errCh in defer directly if there is no error.
if err != nil {
log.L.WithError(err).Error("Failed to handle event stream")
errCh <- err
}
return
case <-backOffCheckCh:
ids := em.backOff.getExpiredIDs()
for _, id := range ids {
queue := em.backOff.deBackOff(id)
for i, evt := range queue.events {
if err := em.handleEvent(evt); err != nil {
log.L.WithError(err).Errorf("Failed to handle backOff event %+v for %s", evt, id)
em.backOff.reBackOff(id, queue.events[i:], queue.duration)
break
}
}
}
}
}
}()
return errCh
}
// stop stops the event monitor. It will close the event channel.
// Once event monitor is stopped, it can't be started.
func (em *eventMonitor) stop() {
em.backOff.stop()
em.cancel()
}
// handleEvent handles a containerd event.
func (em *eventMonitor) handleEvent(any interface{}) error {
ctx := ctrdutil.NamespacedContext()
ctx, cancel := context.WithTimeout(ctx, handleEventTimeout)
defer cancel()
switch e := any.(type) {
case *eventtypes.TaskExit:
log.L.Infof("TaskExit event %+v", e)
// Use ID instead of ContainerID to rule out TaskExit event for exec.
cntr, err := em.c.containerStore.Get(e.ID)
if err == nil {
if err := handleContainerExit(ctx, e, cntr, cntr.SandboxID, em.c); err != nil {
return fmt.Errorf("failed to handle container TaskExit event: %w", err)
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find container for TaskExit event: %w", err)
}
sb, err := em.c.sandboxStore.Get(e.ID)
if err == nil {
if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil {
return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err)
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find sandbox for TaskExit event: %w", err)
}
return nil
case *eventtypes.SandboxExit:
log.L.Infof("SandboxExit event %+v", e)
sb, err := em.c.sandboxStore.Get(e.GetSandboxID())
if err == nil {
if err := handleSandboxExit(ctx, sb, e.ExitStatus, e.ExitedAt.AsTime(), em.c); err != nil {
return fmt.Errorf("failed to handle sandbox TaskExit event: %w", err)
}
return nil
} else if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find sandbox for TaskExit event: %w", err)
}
return nil
case *eventtypes.TaskOOM:
log.L.Infof("TaskOOM event %+v", e)
// For TaskOOM, we only care which container it belongs to.
cntr, err := em.c.containerStore.Get(e.ContainerID)
if err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("can't find container for TaskOOM event: %w", err)
}
return nil
}
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Reason = oomExitReason
return status, nil
})
if err != nil {
return fmt.Errorf("failed to update container status for TaskOOM event: %w", err)
}
// TODO: ImageService should handle these events directly
case *eventtypes.ImageCreate:
log.L.Infof("ImageCreate event %+v", e)
return em.c.UpdateImage(ctx, e.Name)
case *eventtypes.ImageUpdate:
log.L.Infof("ImageUpdate event %+v", e)
return em.c.UpdateImage(ctx, e.Name)
case *eventtypes.ImageDelete:
log.L.Infof("ImageDelete event %+v", e)
return em.c.UpdateImage(ctx, e.Name)
}
return nil
}
// handleContainerExit handles TaskExit event for container.
func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr containerstore.Container, sandboxID string, c *criService) error {
// Attach container IO so that `Delete` could cleanup the stream properly.
task, err := cntr.Container.Task(ctx,
func(*containerdio.FIFOSet) (containerdio.IO, error) {
// We can't directly return cntr.IO here, because
// even if cntr.IO is nil, the cio.IO interface
// is not.
// See https://tour.golang.org/methods/12:
// Note that an interface value that holds a nil
// concrete value is itself non-nil.
if cntr.IO != nil {
return cntr.IO, nil
}
return nil, nil
},
)
if err != nil {
if !errdefs.IsNotFound(err) && !errdefs.IsUnavailable(err) {
return fmt.Errorf("failed to load task for container: %w", err)
}
} else {
// TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
if _, err = task.Delete(ctx, c.nri.WithContainerExit(&cntr), containerd.WithProcessKill); err != nil {
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to stop container: %w", err)
}
// Move on to make sure container status is updated.
}
}
// NOTE: Both sb.Container.Task and task.Delete interface always ensures
// that the status of target task. However, the interfaces return
// ErrNotFound, which doesn't mean that the shim instance doesn't exist.
//
// There are two caches for task in containerd:
//
// 1. io.containerd.service.v1.tasks-service
// 2. io.containerd.runtime.v2.task
//
// First one is to maintain the shim connection and shutdown the shim
// in Delete API. And the second one is to maintain the lifecycle of
// task in shim server.
//
// So, if the shim instance is running and task has been deleted in shim
// server, the sb.Container.Task and task.Delete will receive the
// ErrNotFound. If we don't delete the shim instance in io.containerd.service.v1.tasks-service,
// shim will be leaky.
//
// Based on containerd/containerd#7496 issue, when host is under IO
// pressure, the umount2 syscall will take more than 10 seconds so that
// the CRI plugin will cancel this task.Delete call. However, the shim
// server isn't aware about this. After return from umount2 syscall, the
// shim server continue delete the task record. And then CRI plugin
// retries to delete task and retrieves ErrNotFound and marks it as
// stopped. Therefore, The shim is leaky.
//
// It's hard to handle the connection lost or request canceled cases in
// shim server. We should call Delete API to io.containerd.service.v1.tasks-service
// to ensure that shim instance is shutdown.
//
// REF:
// 1. https://github.com/containerd/containerd/issues/7496#issuecomment-1671100968
// 2. https://github.com/containerd/containerd/issues/8931
if errdefs.IsNotFound(err) {
_, err = c.client.TaskService().Delete(ctx, &apitasks.DeleteTaskRequest{ContainerID: cntr.Container.ID()})
if err != nil {
err = errdefs.FromGRPC(err)
if !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to cleanup container %s in task-service: %w", cntr.Container.ID(), err)
}
}
log.L.Infof("Ensure that container %s in task-service has been cleanup successfully", cntr.Container.ID())
}
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
if status.FinishedAt == 0 {
status.Pid = 0
status.FinishedAt = protobuf.FromTimestamp(e.ExitedAt).UnixNano()
status.ExitCode = int32(e.ExitStatus)
}
// Unknown state can only transit to EXITED state, so we need
// to handle unknown state here.
if status.Unknown {
log.L.Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID)
status.Unknown = false
}
return status, nil
})
if err != nil {
return fmt.Errorf("failed to update container state: %w", err)
}
// Using channel to propagate the information of container stop
cntr.Stop()
c.generateAndSendContainerEvent(ctx, cntr.ID, sandboxID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT)
return nil
}
// handleSandboxExit handles sandbox exit event.
func handleSandboxExit(ctx context.Context, sb sandboxstore.Sandbox, exitStatus uint32, exitTime time.Time, c *criService) error {
if err := sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
status.State = sandboxstore.StateNotReady
status.Pid = 0
status.ExitStatus = exitStatus
status.ExitedAt = exitTime
return status, nil
}); err != nil {
return fmt.Errorf("failed to update sandbox state: %w", err)
}
// Using channel to propagate the information of sandbox stop
sb.Stop()
c.generateAndSendContainerEvent(ctx, sb.ID, sb.ID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT)
return nil
}
func newBackOff() *backOff {
return &backOff{
queuePool: map[string]*backOffQueue{},
minDuration: backOffInitDuration,
maxDuration: backOffMaxDuration,
checkDuration: backOffExpireCheckDuration,
clock: clock.RealClock{},
}
}
func (b *backOff) getExpiredIDs() []string {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
var ids []string
for id, q := range b.queuePool {
if q.isExpire() {
ids = append(ids, id)
}
}
return ids
}
func (b *backOff) isInBackOff(key string) bool {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
if _, ok := b.queuePool[key]; ok {
return true
}
return false
}
// enBackOff start to backOff and put event to the tail of queue
func (b *backOff) enBackOff(key string, evt interface{}) {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
if queue, ok := b.queuePool[key]; ok {
queue.events = append(queue.events, evt)
return
}
b.queuePool[key] = newBackOffQueue([]interface{}{evt}, b.minDuration, b.clock)
}
// enBackOff get out the whole queue
func (b *backOff) deBackOff(key string) *backOffQueue {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
queue := b.queuePool[key]
delete(b.queuePool, key)
return queue
}
// enBackOff start to backOff again and put events to the queue
func (b *backOff) reBackOff(key string, events []interface{}, oldDuration time.Duration) {
b.queuePoolMu.Lock()
defer b.queuePoolMu.Unlock()
duration := 2 * oldDuration
if duration > b.maxDuration {
duration = b.maxDuration
}
b.queuePool[key] = newBackOffQueue(events, duration, b.clock)
}
func (b *backOff) start() <-chan time.Time {
b.tickerMu.Lock()
defer b.tickerMu.Unlock()
b.ticker = time.NewTicker(b.checkDuration)
return b.ticker.C
}
func (b *backOff) stop() {
b.tickerMu.Lock()
defer b.tickerMu.Unlock()
if b.ticker != nil {
b.ticker.Stop()
}
}
func newBackOffQueue(events []interface{}, init time.Duration, c clock.Clock) *backOffQueue {
return &backOffQueue{
events: events,
duration: init,
expireTime: c.Now().Add(init),
clock: c,
}
}
func (q *backOffQueue) isExpire() bool {
// return time.Now >= expireTime
return !q.clock.Now().Before(q.expireTime)
}

View File

@@ -1,136 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
eventtypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/typeurl/v2"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
testingclock "k8s.io/utils/clock/testing"
)
// TestBackOff tests the logic of backOff struct.
func TestBackOff(t *testing.T) {
testStartTime := time.Now()
testClock := testingclock.NewFakeClock(testStartTime)
inputQueues := map[string]*backOffQueue{
"container1": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container1"},
&eventtypes.TaskOOM{ContainerID: "container1"},
},
},
"container2": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container2"},
&eventtypes.TaskOOM{ContainerID: "container2"},
},
},
}
expectedQueues := map[string]*backOffQueue{
"container2": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container2"},
&eventtypes.TaskOOM{ContainerID: "container2"},
},
expireTime: testClock.Now().Add(backOffInitDuration),
duration: backOffInitDuration,
clock: testClock,
},
"container1": {
events: []interface{}{
&eventtypes.TaskOOM{ContainerID: "container1"},
&eventtypes.TaskOOM{ContainerID: "container1"},
},
expireTime: testClock.Now().Add(backOffInitDuration),
duration: backOffInitDuration,
clock: testClock,
},
}
t.Logf("Should be able to backOff a event")
actual := newBackOff()
actual.clock = testClock
for k, queue := range inputQueues {
for _, event := range queue.events {
actual.enBackOff(k, event)
}
}
assert.Equal(t, actual.queuePool, expectedQueues)
t.Logf("Should be able to check if the container is in backOff state")
for k, queue := range inputQueues {
for _, e := range queue.events {
evt, err := typeurl.MarshalAny(e)
assert.NoError(t, err)
key, _, err := convertEvent(evt)
assert.NoError(t, err)
assert.Equal(t, k, key)
assert.Equal(t, actual.isInBackOff(key), true)
}
}
t.Logf("Should be able to check that a container isn't in backOff state")
notExistKey := "containerNotExist"
assert.Equal(t, actual.isInBackOff(notExistKey), false)
t.Logf("No containers should be expired")
assert.Empty(t, actual.getExpiredIDs())
t.Logf("Should be able to get all keys which are expired for backOff")
testClock.Sleep(backOffInitDuration)
actKeyList := actual.getExpiredIDs()
assert.Equal(t, len(inputQueues), len(actKeyList))
for k := range inputQueues {
assert.Contains(t, actKeyList, k)
}
t.Logf("Should be able to get out all backOff events")
doneQueues := map[string]*backOffQueue{}
for k := range inputQueues {
actQueue := actual.deBackOff(k)
doneQueues[k] = actQueue
assert.True(t, cmp.Equal(actQueue.events, expectedQueues[k].events, protobuf.Compare))
}
t.Logf("Should not get out the event again after having got out the backOff event")
for k := range inputQueues {
var expect *backOffQueue
actQueue := actual.deBackOff(k)
assert.Equal(t, actQueue, expect)
}
t.Logf("Should be able to reBackOff")
for k, queue := range doneQueues {
failEventIndex := 1
events := queue.events[failEventIndex:]
actual.reBackOff(k, events, queue.duration)
actQueue := actual.deBackOff(k)
expQueue := &backOffQueue{
events: events,
expireTime: testClock.Now().Add(2 * queue.duration),
duration: 2 * queue.duration,
clock: testClock,
}
assert.Equal(t, actQueue, expQueue)
}
}

View File

@@ -1,33 +0,0 @@
//go:build gofuzz
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"github.com/containerd/containerd/v2/pkg/cri/store/sandbox"
)
func SandboxStore(cs CRIService) (*sandbox.Store, error) {
s, ok := cs.(*criService)
if !ok {
return nil, fmt.Errorf("%+v is not sbserver.criService", cs)
}
return s.sandboxStore, nil
}

View File

@@ -1,630 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"path"
"path/filepath"
"regexp"
goruntime "runtime"
"strconv"
"strings"
"time"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers"
crilabels "github.com/containerd/containerd/v2/pkg/cri/labels"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
clabels "github.com/containerd/containerd/v2/pkg/labels"
"github.com/containerd/errdefs"
"github.com/containerd/log"
)
// TODO: Move common helpers for sbserver and podsandbox to a dedicated package once basic services are functinal.
const (
// errorStartReason is the exit reason when fails to start container.
errorStartReason = "StartError"
// errorStartExitCode is the exit code when fails to start container.
// 128 is the same with Docker's behavior.
// TODO(windows): Figure out what should be used for windows.
errorStartExitCode = 128
// completeExitReason is the exit reason when container exits with code 0.
completeExitReason = "Completed"
// errorExitReason is the exit reason when container exits with code non-zero.
errorExitReason = "Error"
// oomExitReason is the exit reason when process in container is oom killed.
oomExitReason = "OOMKilled"
// sandboxesDir contains all sandbox root. A sandbox root is the running
// directory of the sandbox, all files created for the sandbox will be
// placed under this directory.
sandboxesDir = "sandboxes"
// containersDir contains all container root.
containersDir = "containers"
// Delimiter used to construct container/sandbox names.
nameDelimiter = "_"
// defaultIfName is the default network interface for the pods
defaultIfName = "eth0"
// devShm is the default path of /dev/shm.
devShm = "/dev/shm"
// etcHosts is the default path of /etc/hosts file.
etcHosts = "/etc/hosts"
// etcHostname is the default path of /etc/hostname file.
etcHostname = "/etc/hostname"
// resolvConfPath is the abs path of resolv.conf on host or container.
resolvConfPath = "/etc/resolv.conf"
)
// getSandboxRootDir returns the root directory for managing sandbox files,
// e.g. hosts files.
func (c *criService) getSandboxRootDir(id string) string {
return filepath.Join(c.config.RootDir, sandboxesDir, id)
}
// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files,
// e.g. named pipes.
func (c *criService) getVolatileSandboxRootDir(id string) string {
return filepath.Join(c.config.StateDir, sandboxesDir, id)
}
// getSandboxHostname returns the hostname file path inside the sandbox root directory.
func (c *criService) getSandboxHostname(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hostname")
}
// getSandboxHosts returns the hosts file path inside the sandbox root directory.
func (c *criService) getSandboxHosts(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hosts")
}
// getResolvPath returns resolv.conf filepath for specified sandbox.
func (c *criService) getResolvPath(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
}
// getSandboxDevShm returns the shm file path inside the sandbox root directory.
func (c *criService) getSandboxDevShm(id string) string {
return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
}
// makeSandboxName generates sandbox name from sandbox metadata. The name
// generated is unique as long as sandbox metadata is unique.
func makeSandboxName(s *runtime.PodSandboxMetadata) string {
return strings.Join([]string{
s.Name, // 0
s.Namespace, // 1
s.Uid, // 2
strconv.FormatUint(uint64(s.Attempt), 10), // 3
}, nameDelimiter)
}
// makeContainerName generates container name from sandbox and container metadata.
// The name generated is unique as long as the sandbox container combination is
// unique.
func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetadata) string {
return strings.Join([]string{
c.Name, // 0: container name
s.Name, // 1: pod name
s.Namespace, // 2: pod namespace
s.Uid, // 3: pod uid
strconv.FormatUint(uint64(c.Attempt), 10), // 4: attempt number of creating the container
}, nameDelimiter)
}
// getContainerRootDir returns the root directory for managing container files,
// e.g. state checkpoint.
func (c *criService) getContainerRootDir(id string) string {
return filepath.Join(c.config.RootDir, containersDir, id)
}
// getVolatileContainerRootDir returns the root directory for managing volatile container files,
// e.g. named pipes.
func (c *criService) getVolatileContainerRootDir(id string) string {
return filepath.Join(c.config.StateDir, containersDir, id)
}
// criContainerStateToString formats CRI container state to string.
func criContainerStateToString(state runtime.ContainerState) string {
return runtime.ContainerState_name[int32(state)]
}
// toContainerdImage converts an image object in image store to containerd image handler.
func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) {
// image should always have at least one reference.
if len(image.References) == 0 {
return nil, fmt.Errorf("invalid image with no reference %q", image.ID)
}
return c.client.GetImage(ctx, image.References[0])
}
// getUserFromImage gets uid or user name of the image user.
// If user is numeric, it will be treated as uid; or else, it is treated as user name.
func getUserFromImage(user string) (*int64, string) {
// return both empty if user is not specified in the image.
if user == "" {
return nil, ""
}
// split instances where the id may contain user:group
user = strings.Split(user, ":")[0]
// user could be either uid or user name. Try to interpret as numeric uid.
uid, err := strconv.ParseInt(user, 10, 64)
if err != nil {
// If user is non numeric, assume it's user name.
return nil, user
}
// If user is a numeric uid.
return &uid, ""
}
// validateTargetContainer checks that a container is a valid
// target for a container using PID NamespaceMode_TARGET.
// The target container must be in the same sandbox and must be running.
// Returns the target container for convenience.
func (c *criService) validateTargetContainer(sandboxID, targetContainerID string) (containerstore.Container, error) {
targetContainer, err := c.containerStore.Get(targetContainerID)
if err != nil {
return containerstore.Container{}, fmt.Errorf("container %q does not exist: %w", targetContainerID, err)
}
targetSandboxID := targetContainer.Metadata.SandboxID
if targetSandboxID != sandboxID {
return containerstore.Container{},
fmt.Errorf("container %q (sandbox %s) does not belong to sandbox %s", targetContainerID, targetSandboxID, sandboxID)
}
status := targetContainer.Status.Get()
if state := status.State(); state != runtime.ContainerState_CONTAINER_RUNNING {
return containerstore.Container{}, fmt.Errorf("container %q is not running - in state %s", targetContainerID, state)
}
return targetContainer, nil
}
// isInCRIMounts checks whether a destination is in CRI mount list.
func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
for _, m := range mounts {
if filepath.Clean(m.ContainerPath) == filepath.Clean(dst) {
return true
}
}
return false
}
// filterLabel returns a label filter. Use `%q` here because containerd
// filter needs extra quote to work properly.
func filterLabel(k, v string) string {
return fmt.Sprintf("labels.%q==%q", k, v)
}
// buildLabel builds the labels from config to be passed to containerd
func buildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string {
labels := make(map[string]string)
for k, v := range imageConfigLabels {
if err := clabels.Validate(k, v); err == nil {
labels[k] = v
} else {
// In case the image label is invalid, we output a warning and skip adding it to the
// container.
log.L.WithError(err).Warnf("unable to add image label with key %s to the container", k)
}
}
// labels from the CRI request (config) will override labels in the image config
for k, v := range configLabels {
labels[k] = v
}
labels[crilabels.ContainerKindLabel] = containerType
return labels
}
// getRuntimeOptions get runtime options from container metadata.
func getRuntimeOptions(c containers.Container) (interface{}, error) {
from := c.Runtime.Options
if from == nil || from.GetValue() == nil {
return nil, nil
}
opts, err := typeurl.UnmarshalAny(from)
if err != nil {
return nil, err
}
return opts, nil
}
const (
// unknownExitCode is the exit code when exit reason is unknown.
unknownExitCode = 255
// unknownExitReason is the exit reason when exit reason is unknown.
unknownExitReason = "Unknown"
)
// unknownContainerStatus returns the default container status when its status is unknown.
func unknownContainerStatus() containerstore.Status {
return containerstore.Status{
CreatedAt: 0,
StartedAt: 0,
FinishedAt: 0,
ExitCode: unknownExitCode,
Reason: unknownExitReason,
Unknown: true,
}
}
// getPassthroughAnnotations filters requested pod annotations by comparing
// against permitted annotations for the given runtime.
func getPassthroughAnnotations(podAnnotations map[string]string,
runtimePodAnnotations []string) (passthroughAnnotations map[string]string) {
passthroughAnnotations = make(map[string]string)
for podAnnotationKey, podAnnotationValue := range podAnnotations {
for _, pattern := range runtimePodAnnotations {
// Use path.Match instead of filepath.Match here.
// filepath.Match treated `\\` as path separator
// on windows, which is not what we want.
if ok, _ := path.Match(pattern, podAnnotationKey); ok {
passthroughAnnotations[podAnnotationKey] = podAnnotationValue
}
}
}
return passthroughAnnotations
}
// copyResourcesToStatus copys container resource contraints from spec to
// container status.
// This will need updates when new fields are added to ContainerResources.
func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) containerstore.Status {
status.Resources = &runtime.ContainerResources{}
if spec.Linux != nil {
status.Resources.Linux = &runtime.LinuxContainerResources{}
if spec.Process != nil && spec.Process.OOMScoreAdj != nil {
status.Resources.Linux.OomScoreAdj = int64(*spec.Process.OOMScoreAdj)
}
if spec.Linux.Resources == nil {
return status
}
if spec.Linux.Resources.CPU != nil {
if spec.Linux.Resources.CPU.Period != nil {
status.Resources.Linux.CpuPeriod = int64(*spec.Linux.Resources.CPU.Period)
}
if spec.Linux.Resources.CPU.Quota != nil {
status.Resources.Linux.CpuQuota = *spec.Linux.Resources.CPU.Quota
}
if spec.Linux.Resources.CPU.Shares != nil {
status.Resources.Linux.CpuShares = int64(*spec.Linux.Resources.CPU.Shares)
}
status.Resources.Linux.CpusetCpus = spec.Linux.Resources.CPU.Cpus
status.Resources.Linux.CpusetMems = spec.Linux.Resources.CPU.Mems
}
if spec.Linux.Resources.Memory != nil {
if spec.Linux.Resources.Memory.Limit != nil {
status.Resources.Linux.MemoryLimitInBytes = *spec.Linux.Resources.Memory.Limit
}
if spec.Linux.Resources.Memory.Swap != nil {
status.Resources.Linux.MemorySwapLimitInBytes = *spec.Linux.Resources.Memory.Swap
}
}
if spec.Linux.Resources.HugepageLimits != nil {
hugepageLimits := make([]*runtime.HugepageLimit, 0, len(spec.Linux.Resources.HugepageLimits))
for _, l := range spec.Linux.Resources.HugepageLimits {
hugepageLimits = append(hugepageLimits, &runtime.HugepageLimit{
PageSize: l.Pagesize,
Limit: l.Limit,
})
}
status.Resources.Linux.HugepageLimits = hugepageLimits
}
if spec.Linux.Resources.Unified != nil {
status.Resources.Linux.Unified = spec.Linux.Resources.Unified
}
}
if spec.Windows != nil {
status.Resources.Windows = &runtime.WindowsContainerResources{}
if spec.Windows.Resources == nil {
return status
}
if spec.Windows.Resources.CPU != nil {
if spec.Windows.Resources.CPU.Shares != nil {
status.Resources.Windows.CpuShares = int64(*spec.Windows.Resources.CPU.Shares)
}
if spec.Windows.Resources.CPU.Count != nil {
status.Resources.Windows.CpuCount = int64(*spec.Windows.Resources.CPU.Count)
}
if spec.Windows.Resources.CPU.Maximum != nil {
status.Resources.Windows.CpuMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
}
}
if spec.Windows.Resources.Memory != nil {
if spec.Windows.Resources.Memory.Limit != nil {
status.Resources.Windows.MemoryLimitInBytes = int64(*spec.Windows.Resources.Memory.Limit)
}
}
// TODO: Figure out how to get RootfsSizeInBytes
}
return status
}
func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) {
podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID)
if err != nil {
log.G(ctx).Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err)
podSandboxStatus = nil
}
containerStatuses, err := c.getContainerStatuses(ctx, sandboxID)
if err != nil {
log.G(ctx).Errorf("Failed to get container statuses for container event for sandboxID %q: %v", sandboxID, err)
}
event := runtime.ContainerEventResponse{
ContainerId: containerID,
ContainerEventType: eventType,
CreatedAt: time.Now().UnixNano(),
PodSandboxStatus: podSandboxStatus,
ContainersStatuses: containerStatuses,
}
c.containerEventsQ.Send(event)
}
func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) {
request := &runtime.PodSandboxStatusRequest{PodSandboxId: podSandboxID}
response, err := c.PodSandboxStatus(ctx, request)
if err != nil {
return nil, err
}
return response.GetStatus(), nil
}
func (c *criService) getContainerStatuses(ctx context.Context, podSandboxID string) ([]*runtime.ContainerStatus, error) {
response, err := c.ListContainers(ctx, &runtime.ListContainersRequest{
Filter: &runtime.ContainerFilter{
PodSandboxId: podSandboxID,
},
})
if err != nil {
return nil, err
}
containerStatuses := []*runtime.ContainerStatus{}
for _, container := range response.Containers {
statusResp, err := c.ContainerStatus(ctx, &runtime.ContainerStatusRequest{
ContainerId: container.Id,
Verbose: false,
})
if err != nil {
if errdefs.IsNotFound(err) {
continue
}
return nil, err
}
containerStatuses = append(containerStatuses, statusResp.GetStatus())
}
return containerStatuses, nil
}
// hostNetwork handles checking if host networking was requested.
func hostNetwork(config *runtime.PodSandboxConfig) bool {
var hostNet bool
switch goruntime.GOOS {
case "windows":
// Windows HostProcess pods can only run on the host network
hostNet = config.GetWindows().GetSecurityContext().GetHostProcess()
case "darwin":
// No CNI on Darwin yet.
hostNet = true
default:
// Even on other platforms, the logic containerd uses is to check if NamespaceMode == NODE.
// So this handles Linux, as well as any other platforms not governed by the cases above
// that have special quirks.
hostNet = config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
}
return hostNet
}
// getCgroupsPath generates container cgroups path.
func getCgroupsPath(cgroupsParent, id string) string {
base := path.Base(cgroupsParent)
if strings.HasSuffix(base, ".slice") {
// For a.slice/b.slice/c.slice, base is c.slice.
// runc systemd cgroup path format is "slice:prefix:name".
return strings.Join([]string{base, "cri-containerd", id}, ":")
}
return filepath.Join(cgroupsParent, id)
}
func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) {
var labels []string
if selinuxOptions == nil {
return nil, nil
}
if err := checkSelinuxLevel(selinuxOptions.Level); err != nil {
return nil, err
}
if selinuxOptions.User != "" {
labels = append(labels, "user:"+selinuxOptions.User)
}
if selinuxOptions.Role != "" {
labels = append(labels, "role:"+selinuxOptions.Role)
}
if selinuxOptions.Type != "" {
labels = append(labels, "type:"+selinuxOptions.Type)
}
if selinuxOptions.Level != "" {
labels = append(labels, "level:"+selinuxOptions.Level)
}
return labels, nil
}
func checkSelinuxLevel(level string) error {
if len(level) == 0 {
return nil
}
matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level)
if err != nil {
return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err)
}
if !matched {
return fmt.Errorf("the format of 'level' %q is not correct", level)
}
return nil
}
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]runtimespec.LinuxIDMapping, error) {
var m []runtimespec.LinuxIDMapping
if len(runtimeIDMap) == 0 {
return m, nil
}
if len(runtimeIDMap) > 1 {
// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
}
// We know len is 1 now.
if runtimeIDMap[0] == nil {
return m, nil
}
uidMap := *runtimeIDMap[0]
if uidMap.Length < 1 {
return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
}
m = []runtimespec.LinuxIDMapping{
{
ContainerID: uidMap.ContainerId,
HostID: uidMap.HostId,
Size: uidMap.Length,
},
}
return m, nil
}
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []runtimespec.LinuxIDMapping, retErr error) {
if userns == nil {
// If userns is not set, the kubelet doesn't support this option
// and we should just fallback to no userns. This is completely
// valid.
return nil, nil, nil
}
uids, err := parseUsernsIDMap(userns.GetUids())
if err != nil {
return nil, nil, fmt.Errorf("UID mapping: %w", err)
}
gids, err = parseUsernsIDMap(userns.GetGids())
if err != nil {
return nil, nil, fmt.Errorf("GID mapping: %w", err)
}
switch mode := userns.GetMode(); mode {
case runtime.NamespaceMode_NODE:
if len(uids) != 0 || len(gids) != 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
}
case runtime.NamespaceMode_POD:
// This is valid, we will handle it in WithPodNamespaces().
if len(uids) == 0 || len(gids) == 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
}
default:
return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
return uids, gids, nil
}
// sameUsernsConfig checks if the userns configs are the same. If the mappings
// on each config are the same but in different order, it returns false.
// XXX: If the runtime.UserNamespace struct changes, we should update this
// function accordingly.
func sameUsernsConfig(a, b *runtime.UserNamespace) bool {
// If both are nil, they are the same.
if a == nil && b == nil {
return true
}
// If only one is nil, they are different.
if a == nil || b == nil {
return false
}
// At this point, a is not nil nor b.
if a.GetMode() != b.GetMode() {
return false
}
aUids, aGids, err := parseUsernsIDs(a)
if err != nil {
return false
}
bUids, bGids, err := parseUsernsIDs(b)
if err != nil {
return false
}
if !sameMapping(aUids, bUids) {
return false
}
if !sameMapping(aGids, bGids) {
return false
}
return true
}
// sameMapping checks if the mappings are the same. If the mappings are the same
// but in different order, it returns false.
func sameMapping(a, b []runtimespec.LinuxIDMapping) bool {
if len(a) != len(b) {
return false
}
for x := range a {
if a[x].ContainerID != b[x].ContainerID {
return false
}
if a[x].HostID != b[x].HostID {
return false
}
if a[x].Size != b[x].Size {
return false
}
}
return true
}

View File

@@ -1,209 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/containerd/cgroups/v3"
"github.com/moby/sys/mountinfo"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/apparmor"
"github.com/containerd/containerd/v2/pkg/seccomp"
"github.com/containerd/containerd/v2/pkg/seutil"
"github.com/containerd/log"
)
// apparmorEnabled returns true if apparmor is enabled, supported by the host,
// if apparmor_parser is installed, and if we are not running docker-in-docker.
func (c *criService) apparmorEnabled() bool {
if c.config.DisableApparmor {
return false
}
return apparmor.HostSupports()
}
func (c *criService) seccompEnabled() bool {
return seccomp.IsEnabled()
}
// openLogFile opens/creates a container log file.
func openLogFile(path string) (*os.File, error) {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return nil, err
}
return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
}
// unmountRecursive unmounts the target and all mounts underneath, starting with
// the deepest mount first.
func unmountRecursive(ctx context.Context, target string) error {
target, err := mount.CanonicalizePath(target)
if err != nil {
return err
}
toUnmount, err := mountinfo.GetMounts(mountinfo.PrefixFilter(target))
if err != nil {
return err
}
// Make the deepest mount be first
sort.Slice(toUnmount, func(i, j int) bool {
return len(toUnmount[i].Mountpoint) > len(toUnmount[j].Mountpoint)
})
for i, m := range toUnmount {
if err := mount.UnmountAll(m.Mountpoint, unix.MNT_DETACH); err != nil {
if i == len(toUnmount)-1 { // last mount
return err
}
// This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem
log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", m.Mountpoint)
}
}
return nil
}
// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `ensureRemoveAll` if you really want to make every effort to remove
// a directory.
//
// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
// can be a race between reading directory entries and then actually attempting
// to remove everything in the directory.
// These types of errors do not need to be returned since it's ok for the dir to
// be gone we can just retry the remove operation.
//
// This should not return a `os.ErrNotExist` kind of error under any circumstances
func ensureRemoveAll(ctx context.Context, dir string) error {
notExistErr := make(map[string]bool)
// track retries
exitOnErr := make(map[string]int)
maxRetry := 50
// Attempt to unmount anything beneath this dir first.
if err := unmountRecursive(ctx, dir); err != nil {
log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir)
}
for {
err := os.RemoveAll(dir)
if err == nil {
return nil
}
pe, ok := err.(*os.PathError)
if !ok {
return err
}
if os.IsNotExist(err) {
if notExistErr[pe.Path] {
return err
}
notExistErr[pe.Path] = true
// There is a race where some subdir can be removed but after the
// parent dir entries have been read.
// So the path could be from `os.Remove(subdir)`
// If the reported non-existent path is not the passed in `dir` we
// should just retry, but otherwise return with no error.
if pe.Path == dir {
return nil
}
continue
}
if pe.Err != syscall.EBUSY {
return err
}
if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil {
return fmt.Errorf("error while removing %s: %w", dir, e)
}
if exitOnErr[pe.Path] == maxRetry {
return err
}
exitOnErr[pe.Path]++
time.Sleep(100 * time.Millisecond)
}
}
var vmbasedRuntimes = []string{
"io.containerd.kata",
}
func isVMBasedRuntime(runtimeType string) bool {
for _, rt := range vmbasedRuntimes {
if strings.Contains(runtimeType, rt) {
return true
}
}
return false
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
if !isVMBasedRuntime(runtimeType) {
return nil
}
l, err := seutil.ChangeToKVM(spec.Process.SelinuxLabel)
if err != nil {
return fmt.Errorf("failed to get selinux kvm label: %w", err)
}
spec.Process.SelinuxLabel = l
return nil
}
// getCgroupsMode returns cgropu mode.
// TODO: add build constraints to cgroups package and remove this helper
func isUnifiedCgroupsMode() bool {
return cgroups.Mode() == cgroups.Unified
}
func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
snapshotOpt := []snapshots.Opt{}
usernsOpts := nsOpts.GetUsernsOptions()
if usernsOpts == nil {
return snapshotOpt, nil
}
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
}
return snapshotOpt, nil
}

View File

@@ -1,47 +0,0 @@
//go:build !windows && !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"os"
"github.com/opencontainers/runtime-spec/specs-go"
)
// openLogFile opens/creates a container log file.
func openLogFile(path string) (*os.File, error) {
return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
}
// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `ensureRemoveAll` if you really want to make every effort to remove
// a directory.
func ensureRemoveAll(ctx context.Context, dir string) error {
return os.RemoveAll(dir)
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
return nil
}
func isUnifiedCgroupsMode() bool {
return false
}

View File

@@ -1,558 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"os"
goruntime "runtime"
"strings"
"testing"
"time"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/containers"
runcoptions "github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
crilabels "github.com/containerd/containerd/v2/pkg/cri/labels"
containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/typeurl/v2"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pelletier/go-toml/v2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestGetUserFromImage tests the logic of getting image uid or user name of image user.
func TestGetUserFromImage(t *testing.T) {
newI64 := func(i int64) *int64 { return &i }
for _, test := range []struct {
desc string
user string
uid *int64
name string
}{
{
desc: "no gid",
user: "0",
uid: newI64(0),
},
{
desc: "uid/gid",
user: "0:1",
uid: newI64(0),
},
{
desc: "empty user",
user: "",
},
{
desc: "multiple separators",
user: "1:2:3",
uid: newI64(1),
},
{
desc: "root username",
user: "root:root",
name: "root",
},
{
desc: "username",
user: "test:test",
name: "test",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
actualUID, actualName := getUserFromImage(test.user)
assert.Equal(t, test.uid, actualUID)
assert.Equal(t, test.name, actualName)
})
}
}
func TestBuildLabels(t *testing.T) {
imageConfigLabels := map[string]string{
"a": "z",
"d": "y",
"long-label": strings.Repeat("example", 10000),
}
configLabels := map[string]string{
"a": "b",
"c": "d",
}
newLabels := buildLabels(configLabels, imageConfigLabels, crilabels.ContainerKindSandbox)
assert.Len(t, newLabels, 4)
assert.Equal(t, "b", newLabels["a"])
assert.Equal(t, "d", newLabels["c"])
assert.Equal(t, "y", newLabels["d"])
assert.Equal(t, crilabels.ContainerKindSandbox, newLabels[crilabels.ContainerKindLabel])
assert.NotContains(t, newLabels, "long-label")
newLabels["a"] = "e"
assert.Empty(t, configLabels[crilabels.ContainerKindLabel], "should not add new labels into original label")
assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label")
}
func TestGenerateRuntimeOptions(t *testing.T) {
nilOpts := `
systemd_cgroup = true
[containerd]
no_pivot = true
default_runtime_name = "default"
[containerd.runtimes.runcv2]
runtime_type = "` + plugins.RuntimeRuncV2 + `"
`
nonNilOpts := `
systemd_cgroup = true
[containerd]
no_pivot = true
default_runtime_name = "default"
[containerd.runtimes.legacy.options]
Runtime = "legacy"
RuntimeRoot = "/legacy"
[containerd.runtimes.runc.options]
BinaryName = "runc"
Root = "/runc"
NoNewKeyring = true
[containerd.runtimes.runcv2]
runtime_type = "` + plugins.RuntimeRuncV2 + `"
[containerd.runtimes.runcv2.options]
BinaryName = "runc"
Root = "/runcv2"
NoNewKeyring = true
`
var nilOptsConfig, nonNilOptsConfig criconfig.Config
err := toml.Unmarshal([]byte(nilOpts), &nilOptsConfig)
require.NoError(t, err)
require.Len(t, nilOptsConfig.Runtimes, 1)
err = toml.Unmarshal([]byte(nonNilOpts), &nonNilOptsConfig)
require.NoError(t, err)
require.Len(t, nonNilOptsConfig.Runtimes, 3)
for _, test := range []struct {
desc string
r criconfig.Runtime
c criconfig.Config
expectedOptions interface{}
}{
{
desc: "when options is nil, should return nil option for io.containerd.runc.v2",
r: nilOptsConfig.Runtimes["runcv2"],
c: nilOptsConfig,
expectedOptions: nil,
},
{
desc: "when options is not nil, should be able to decode for io.containerd.runc.v2",
r: nonNilOptsConfig.Runtimes["runcv2"],
c: nonNilOptsConfig,
expectedOptions: &runcoptions.Options{
BinaryName: "runc",
Root: "/runcv2",
NoNewKeyring: true,
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
opts, err := criconfig.GenerateRuntimeOptions(test.r)
assert.NoError(t, err)
assert.Equal(t, test.expectedOptions, opts)
})
}
}
func TestEnvDeduplication(t *testing.T) {
for _, test := range []struct {
desc string
existing []string
kv [][2]string
expected []string
}{
{
desc: "single env",
kv: [][2]string{
{"a", "b"},
},
expected: []string{"a=b"},
},
{
desc: "multiple envs",
kv: [][2]string{
{"a", "b"},
{"c", "d"},
{"e", "f"},
},
expected: []string{
"a=b",
"c=d",
"e=f",
},
},
{
desc: "env override",
kv: [][2]string{
{"k1", "v1"},
{"k2", "v2"},
{"k3", "v3"},
{"k3", "v4"},
{"k1", "v5"},
{"k4", "v6"},
},
expected: []string{
"k1=v5",
"k2=v2",
"k3=v4",
"k4=v6",
},
},
{
desc: "existing env",
existing: []string{
"k1=v1",
"k2=v2",
"k3=v3",
},
kv: [][2]string{
{"k3", "v4"},
{"k2", "v5"},
{"k4", "v6"},
},
expected: []string{
"k1=v1",
"k2=v5",
"k3=v4",
"k4=v6",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
var spec runtimespec.Spec
if len(test.existing) > 0 {
spec.Process = &runtimespec.Process{
Env: test.existing,
}
}
for _, kv := range test.kv {
oci.WithEnv([]string{kv[0] + "=" + kv[1]})(context.Background(), nil, nil, &spec)
}
assert.Equal(t, test.expected, spec.Process.Env)
})
}
}
func TestPassThroughAnnotationsFilter(t *testing.T) {
for _, test := range []struct {
desc string
podAnnotations map[string]string
runtimePodAnnotations []string
passthroughAnnotations map[string]string
}{
{
desc: "should support direct match",
podAnnotations: map[string]string{"c": "d", "d": "e"},
runtimePodAnnotations: []string{"c"},
passthroughAnnotations: map[string]string{"c": "d"},
},
{
desc: "should support wildcard match",
podAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
runtimePodAnnotations: []string{"*.f", "z*g", "y.c*"},
passthroughAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"y.ca": "b",
},
},
{
desc: "should support wildcard match all",
podAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
runtimePodAnnotations: []string{"*"},
passthroughAnnotations: map[string]string{
"t.f": "j",
"z.g": "o",
"z": "o",
"y.ca": "b",
"y": "b",
},
},
{
desc: "should support match including path separator",
podAnnotations: map[string]string{
"matchend.com/end": "1",
"matchend.com/end1": "2",
"matchend.com/1end": "3",
"matchmid.com/mid": "4",
"matchmid.com/mi1d": "5",
"matchmid.com/mid1": "6",
"matchhead.com/head": "7",
"matchhead.com/1head": "8",
"matchhead.com/head1": "9",
"matchall.com/abc": "10",
"matchall.com/def": "11",
"end/matchend": "12",
"end1/matchend": "13",
"1end/matchend": "14",
"mid/matchmid": "15",
"mi1d/matchmid": "16",
"mid1/matchmid": "17",
"head/matchhead": "18",
"1head/matchhead": "19",
"head1/matchhead": "20",
"abc/matchall": "21",
"def/matchall": "22",
"match1/match2": "23",
"nomatch/nomatch": "24",
},
runtimePodAnnotations: []string{
"matchend.com/end*",
"matchmid.com/mi*d",
"matchhead.com/*head",
"matchall.com/*",
"end*/matchend",
"mi*d/matchmid",
"*head/matchhead",
"*/matchall",
"match*/match*",
},
passthroughAnnotations: map[string]string{
"matchend.com/end": "1",
"matchend.com/end1": "2",
"matchmid.com/mid": "4",
"matchmid.com/mi1d": "5",
"matchhead.com/head": "7",
"matchhead.com/1head": "8",
"matchall.com/abc": "10",
"matchall.com/def": "11",
"end/matchend": "12",
"end1/matchend": "13",
"mid/matchmid": "15",
"mi1d/matchmid": "16",
"head/matchhead": "18",
"1head/matchhead": "19",
"abc/matchall": "21",
"def/matchall": "22",
"match1/match2": "23",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
passthroughAnnotations := getPassthroughAnnotations(test.podAnnotations, test.runtimePodAnnotations)
assert.Equal(t, test.passthroughAnnotations, passthroughAnnotations)
})
}
}
func TestEnsureRemoveAllNotExist(t *testing.T) {
// should never return an error for a non-existent path
if err := ensureRemoveAll(context.Background(), "/non/existent/path"); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithDir(t *testing.T) {
dir := t.TempDir()
if err := ensureRemoveAll(context.Background(), dir); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithFile(t *testing.T) {
tmp, err := os.CreateTemp("", "test-ensure-removeall-with-dir")
if err != nil {
t.Fatal(err)
}
tmp.Close()
if err := ensureRemoveAll(context.Background(), tmp.Name()); err != nil {
t.Fatal(err)
}
}
// Helper function for setting up an environment to test PID namespace targeting.
func addContainer(c *criService, containerID, sandboxID string, PID uint32, createdAt, startedAt, finishedAt int64) error {
meta := containerstore.Metadata{
ID: containerID,
SandboxID: sandboxID,
}
status := containerstore.Status{
Pid: PID,
CreatedAt: createdAt,
StartedAt: startedAt,
FinishedAt: finishedAt,
}
container, err := containerstore.NewContainer(meta,
containerstore.WithFakeStatus(status),
)
if err != nil {
return err
}
return c.containerStore.Add(container)
}
func TestValidateTargetContainer(t *testing.T) {
testSandboxID := "test-sandbox-uid"
// The existing container that will be targeted.
testTargetContainerID := "test-target-container"
testTargetContainerPID := uint32(4567)
// A container that has finished running and cannot be targeted.
testStoppedContainerID := "stopped-target-container"
testStoppedContainerPID := uint32(6789)
// A container from another pod.
testOtherContainerSandboxID := "other-sandbox-uid"
testOtherContainerID := "other-target-container"
testOtherContainerPID := uint32(7890)
// Container create/start/stop times.
createdAt := time.Now().Add(-15 * time.Second).UnixNano()
startedAt := time.Now().Add(-10 * time.Second).UnixNano()
finishedAt := time.Now().Add(-5 * time.Second).UnixNano()
c := newTestCRIService()
// Create a target container.
err := addContainer(c, testTargetContainerID, testSandboxID, testTargetContainerPID, createdAt, startedAt, 0)
require.NoError(t, err, "error creating test target container")
// Create a stopped container.
err = addContainer(c, testStoppedContainerID, testSandboxID, testStoppedContainerPID, createdAt, startedAt, finishedAt)
require.NoError(t, err, "error creating test stopped container")
// Create a container in another pod.
err = addContainer(c, testOtherContainerID, testOtherContainerSandboxID, testOtherContainerPID, createdAt, startedAt, 0)
require.NoError(t, err, "error creating test container in other pod")
for _, test := range []struct {
desc string
targetContainerID string
expectError bool
}{
{
desc: "target container in pod",
targetContainerID: testTargetContainerID,
expectError: false,
},
{
desc: "target stopped container in pod",
targetContainerID: testStoppedContainerID,
expectError: true,
},
{
desc: "target container does not exist",
targetContainerID: "no-container-with-this-id",
expectError: true,
},
{
desc: "target container in other pod",
targetContainerID: testOtherContainerID,
expectError: true,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
targetContainer, err := c.validateTargetContainer(testSandboxID, test.targetContainerID)
if test.expectError {
require.Error(t, err, "target should have been invalid but no error")
return
}
require.NoErrorf(t, err, "target should have been valid but got error")
assert.Equal(t, test.targetContainerID, targetContainer.ID, "returned target container does not have expected ID")
})
}
}
func TestGetRuntimeOptions(t *testing.T) {
_, err := getRuntimeOptions(containers.Container{})
require.NoError(t, err)
var pbany *types.Any // This is nil.
var typeurlAny typeurl.Any = pbany // This is typed nil.
_, err = getRuntimeOptions(containers.Container{Runtime: containers.RuntimeInfo{Options: typeurlAny}})
require.NoError(t, err)
}
func TestHostNetwork(t *testing.T) {
tests := []struct {
name string
c *runtime.PodSandboxConfig
expected bool
}{
{
name: "when pod namespace return false",
c: &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_POD,
},
},
},
},
expected: false,
},
{
name: "when node namespace return true",
c: &runtime.PodSandboxConfig{
Linux: &runtime.LinuxPodSandboxConfig{
SecurityContext: &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
Network: runtime.NamespaceMode_NODE,
},
},
},
},
expected: true,
},
}
for _, tt := range tests {
if goruntime.GOOS != "linux" {
t.Skip()
}
tt := tt
t.Run(tt.name, func(t *testing.T) {
if hostNetwork(tt.c) != tt.expected {
t.Errorf("failed hostNetwork got %t expected %t", hostNetwork(tt.c), tt.expected)
}
})
}
}

View File

@@ -1,175 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"os"
"path/filepath"
"syscall"
"github.com/opencontainers/runtime-spec/specs-go"
)
// openLogFile opens/creates a container log file.
// It specifies `FILE_SHARE_DELETE` option to make sure
// log files can be rotated by kubelet.
//
// Unfortunately this needs to be maintained as Go doesn't
// have a way to set FILE_SHARE_DELETE for os.OpenFile.
// https://github.com/golang/go/issues/32088
func openLogFile(path string) (*os.File, error) {
path = fixLongPath(path)
if len(path) == 0 {
return nil, syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return nil, err
}
createmode := uint32(syscall.OPEN_ALWAYS)
access := uint32(syscall.FILE_APPEND_DATA)
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE)
h, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0)
if err != nil {
return nil, err
}
return os.NewFile(uintptr(h), path), nil
}
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// fixLongPath returns the extended-length (\\?\-prefixed) form of
// path when needed, in order to avoid the default 260 character file
// path limit imposed by Windows. If path is not easily converted to
// the extended-length form (for example, if path is a relative path
// or contains .. elements), or is short enough, fixLongPath returns
// path unmodified.
//
// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
//
// This is copied from https://golang.org/src/path/filepath/path_windows.go.
func fixLongPath(path string) string {
// Do nothing (and don't allocate) if the path is "short".
// Empirically (at least on the Windows Server 2013 builder),
// the kernel is arbitrarily okay with < 248 bytes. That
// matches what the docs above say:
// "When using an API to create a directory, the specified
// path cannot be so long that you cannot append an 8.3 file
// name (that is, the directory name cannot exceed MAX_PATH
// minus 12)." Since MAX_PATH is 260, 260 - 12 = 248.
//
// The MSDN docs appear to say that a normal path that is 248 bytes long
// will work; empirically the path must be less then 248 bytes long.
if len(path) < 248 {
// Don't fix. (This is how Go 1.7 and earlier worked,
// not automatically generating the \\?\ form)
return path
}
// The extended form begins with \\?\, as in
// \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt.
// The extended form disables evaluation of . and .. path
// elements and disables the interpretation of / as equivalent
// to \. The conversion here rewrites / to \ and elides
// . elements as well as trailing or duplicate separators. For
// simplicity it avoids the conversion entirely for relative
// paths or paths containing .. elements. For now,
// \\server\share paths are not converted to
// \\?\UNC\server\share paths because the rules for doing so
// are less well-specified.
if len(path) >= 2 && path[:2] == `\\` {
// Don't canonicalize UNC paths.
return path
}
if !filepath.IsAbs(path) {
// Relative path
return path
}
const prefix = `\\?`
pathbuf := make([]byte, len(prefix)+len(path)+len(`\`))
copy(pathbuf, prefix)
n := len(path)
r, w := 0, len(prefix)
for r < n {
switch {
case os.IsPathSeparator(path[r]):
// empty block
r++
case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])):
// /./
r++
case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])):
// /../ is currently unhandled
return path
default:
pathbuf[w] = '\\'
w++
for ; r < n && !os.IsPathSeparator(path[r]); r++ {
pathbuf[w] = path[r]
w++
}
}
}
// A drive's root directory needs a trailing \
if w == len(`\\?\c:`) {
pathbuf[w] = '\\'
w++
}
return string(pathbuf[:w])
}
// ensureRemoveAll is a wrapper for os.RemoveAll on Windows.
func ensureRemoveAll(_ context.Context, dir string) error {
return os.RemoveAll(dir)
}
func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
return nil
}
func isUnifiedCgroupsMode() bool {
return false
}

View File

@@ -1,71 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestWindowsHostNetwork(t *testing.T) {
tests := []struct {
name string
c *runtime.PodSandboxConfig
expected bool
}{
{
name: "when host process is false returns false",
c: &runtime.PodSandboxConfig{
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{
HostProcess: false,
},
},
},
expected: false,
},
{
name: "when host process is true return true",
c: &runtime.PodSandboxConfig{
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{
HostProcess: true,
},
},
},
expected: true,
},
{
name: "when no host process return false",
c: &runtime.PodSandboxConfig{
Windows: &runtime.WindowsPodSandboxConfig{
SecurityContext: &runtime.WindowsSandboxSecurityContext{},
},
},
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if hostNetwork(tt.c) != tt.expected {
t.Errorf("failed hostNetwork got %t expected %t", hostNetwork(tt.c), tt.expected)
}
})
}
}

View File

@@ -1,77 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"fmt"
"sync"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/log"
"github.com/containerd/platforms"
)
// LoadImages checks all existing images to ensure they are ready to
// be used for CRI. It may try to recover images which are not ready
// but will only log errors, not return any.
func (c *CRIImageService) CheckImages(ctx context.Context) error {
// TODO: Move way from `client.ListImages` to directly using image store
cImages, err := c.client.ListImages(ctx)
if err != nil {
return fmt.Errorf("unable to list images: %w", err)
}
// TODO: Support all snapshotter
snapshotter := c.config.Snapshotter
var wg sync.WaitGroup
for _, i := range cImages {
wg.Add(1)
i := i
go func() {
defer wg.Done()
// TODO: Check platform/snapshot combination. Snapshot check should come first
ok, _, _, _, err := images.Check(ctx, i.ContentStore(), i.Target(), platforms.Default())
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to check image content readiness for %q", i.Name())
return
}
if !ok {
log.G(ctx).Warnf("The image content readiness for %q is not ok", i.Name())
return
}
// Checking existence of top-level snapshot for each image being recovered.
// TODO: This logic should be done elsewhere and owned by the image service
unpacked, err := i.IsUnpacked(ctx, snapshotter)
if err != nil {
log.G(ctx).WithError(err).Warnf("Failed to check whether image is unpacked for image %s", i.Name())
return
}
if !unpacked {
log.G(ctx).Warnf("The image %s is not unpacked.", i.Name())
// TODO(random-liu): Consider whether we should try unpack here.
}
if err := c.UpdateImage(ctx, i.Name()); err != nil {
log.G(ctx).WithError(err).Warnf("Failed to update reference for image %q", i.Name())
return
}
log.G(ctx).Debugf("Loaded image %q", i.Name())
}()
}
wg.Wait()
return nil
}

View File

@@ -1,40 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ListImages lists existing images.
// TODO(random-liu): Add image list filters after CRI defines this more clear, and kubelet
// actually needs it.
func (c *GRPCCRIImageService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (*runtime.ListImagesResponse, error) {
// TODO: From CRIImageService directly
imagesInStore := c.imageStore.List()
var images []*runtime.Image
for _, image := range imagesInStore {
// TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
// doesn't exist?
images = append(images, toCRIImage(image))
}
return &runtime.ListImagesResponse{Images: images}, nil
}

View File

@@ -1,113 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
)
func TestListImages(t *testing.T) {
_, c := newTestCRIService()
imagesInStore := []imagestore.Image{
{
ID: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
ChainID: "test-chainid-1",
References: []string{
"gcr.io/library/busybox:latest",
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 1000,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "root",
},
},
},
{
ID: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
ChainID: "test-chainid-2",
References: []string{
"gcr.io/library/alpine:latest",
"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 2000,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "1234:1234",
},
},
},
{
ID: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
ChainID: "test-chainid-3",
References: []string{
"gcr.io/library/ubuntu:latest",
"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 3000,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "nobody",
},
},
},
}
expect := []*runtime.Image{
{
Id: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
RepoTags: []string{"gcr.io/library/busybox:latest"},
RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(1000),
Username: "root",
},
{
Id: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
RepoTags: []string{"gcr.io/library/alpine:latest"},
RepoDigests: []string{"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(2000),
Uid: &runtime.Int64Value{Value: 1234},
},
{
Id: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
RepoTags: []string{"gcr.io/library/ubuntu:latest"},
RepoDigests: []string{"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(3000),
Username: "nobody",
},
}
var err error
c.imageStore, err = imagestore.NewFakeStore(imagesInStore)
assert.NoError(t, err)
resp, err := c.ListImages(context.Background(), &runtime.ListImagesRequest{})
assert.NoError(t, err)
require.NotNil(t, resp)
images := resp.GetImages()
assert.Len(t, images, len(expect))
for _, i := range expect {
assert.Contains(t, images, i)
}
}

View File

@@ -1,792 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"crypto/tls"
"encoding/base64"
"fmt"
"io"
"net"
"net/http"
"net/url"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/containerd/log"
distribution "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
eventstypes "github.com/containerd/containerd/v2/api/events"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/diff"
containerdimages "github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/core/remotes/docker/config"
"github.com/containerd/containerd/v2/pkg/cri/annotations"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
crilabels "github.com/containerd/containerd/v2/pkg/cri/labels"
snpkg "github.com/containerd/containerd/v2/pkg/snapshotters"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
)
// For image management:
// 1) We have an in-memory metadata index to:
// a. Maintain ImageID -> RepoTags, ImageID -> RepoDigset relationships; ImageID
// is the digest of image config, which conforms to oci image spec.
// b. Cache constant and useful information such as image chainID, config etc.
// c. An image will be added into the in-memory metadata only when it's successfully
// pulled and unpacked.
//
// 2) We use containerd image metadata store and content store:
// a. To resolve image reference (digest/tag) locally. During pulling image, we
// normalize the image reference provided by user, and put it into image metadata
// store with resolved descriptor. For the other operations, if image id is provided,
// we'll access the in-memory metadata index directly; if image reference is
// provided, we'll normalize it, resolve it in containerd image metadata store
// to get the image id.
// b. As the backup of in-memory metadata in 1). During startup, the in-memory
// metadata could be re-constructed from image metadata store + content store.
//
// Several problems with current approach:
// 1) An entry in containerd image metadata store doesn't mean a "READY" (successfully
// pulled and unpacked) image. E.g. during pulling, the client gets killed. In that case,
// if we saw an image without snapshots or with in-complete contents during startup,
// should we re-pull the image? Or should we remove the entry?
//
// yanxuean: We can't delete image directly, because we don't know if the image
// is pulled by us. There are resource leakage.
//
// 2) Containerd suggests user to add entry before pulling the image. However if
// an error occurs during the pulling, should we remove the entry from metadata
// store? Or should we leave it there until next startup (resource leakage)?
//
// 3) The cri plugin only exposes "READY" (successfully pulled and unpacked) images
// to the user, which are maintained in the in-memory metadata index. However, it's
// still possible that someone else removes the content or snapshot by-pass the cri plugin,
// how do we detect that and update the in-memory metadata correspondingly? Always
// check whether corresponding snapshot is ready when reporting image status?
//
// 4) Is the content important if we cached necessary information in-memory
// after we pull the image? How to manage the disk usage of contents? If some
// contents are missing but snapshots are ready, is the image still "READY"?
// PullImage pulls an image with authentication config.
func (c *GRPCCRIImageService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) {
imageRef := r.GetImage().GetImage()
credentials := func(host string) (string, string, error) {
hostauth := r.GetAuth()
if hostauth == nil {
config := c.config.Registry.Configs[host]
if config.Auth != nil {
hostauth = toRuntimeAuthConfig(*config.Auth)
}
}
return ParseAuth(hostauth, host)
}
ref, err := c.CRIImageService.PullImage(ctx, imageRef, credentials, r.SandboxConfig)
if err != nil {
return nil, err
}
return &runtime.PullImageResponse{ImageRef: ref}, nil
}
func (c *CRIImageService) PullImage(ctx context.Context, name string, credentials func(string) (string, string, error), sandboxConfig *runtime.PodSandboxConfig) (_ string, err error) {
span := tracing.SpanFromContext(ctx)
defer func() {
// TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism
// for the user to configure the set of registries that they are interested in.
if err != nil {
imagePulls.WithValues("failure").Inc()
} else {
imagePulls.WithValues("success").Inc()
}
}()
inProgressImagePulls.Inc()
defer inProgressImagePulls.Dec()
startTime := time.Now()
namedRef, err := distribution.ParseDockerRef(name)
if err != nil {
return "", fmt.Errorf("failed to parse image reference %q: %w", name, err)
}
ref := namedRef.String()
if ref != name {
log.G(ctx).Debugf("PullImage using normalized image ref: %q", ref)
}
imagePullProgressTimeout, err := time.ParseDuration(c.config.ImagePullProgressTimeout)
if err != nil {
return "", fmt.Errorf("failed to parse image_pull_progress_timeout %q: %w", c.config.ImagePullProgressTimeout, err)
}
var (
pctx, pcancel = context.WithCancel(ctx)
pullReporter = newPullProgressReporter(ref, pcancel, imagePullProgressTimeout)
resolver = docker.NewResolver(docker.ResolverOptions{
Headers: c.config.Registry.Headers,
Hosts: c.registryHosts(ctx, credentials, pullReporter.optionUpdateClient),
})
isSchema1 bool
imageHandler containerdimages.HandlerFunc = func(_ context.Context,
desc imagespec.Descriptor) ([]imagespec.Descriptor, error) {
if desc.MediaType == containerdimages.MediaTypeDockerSchema1Manifest {
isSchema1 = true
}
return nil, nil
}
)
defer pcancel()
snapshotter, err := c.snapshotterFromPodSandboxConfig(ctx, ref, sandboxConfig)
if err != nil {
return "", err
}
log.G(ctx).Debugf("PullImage %q with snapshotter %s", ref, snapshotter)
span.SetAttributes(
tracing.Attribute("image.ref", ref),
tracing.Attribute("snapshotter.name", snapshotter),
)
labels := c.getLabels(ctx, ref)
pullOpts := []containerd.RemoteOpt{
containerd.WithSchema1Conversion, //nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility.
containerd.WithResolver(resolver),
containerd.WithPullSnapshotter(snapshotter),
containerd.WithPullUnpack,
containerd.WithPullLabels(labels),
containerd.WithMaxConcurrentDownloads(c.config.MaxConcurrentDownloads),
containerd.WithImageHandler(imageHandler),
containerd.WithUnpackOpts([]containerd.UnpackOpt{
containerd.WithUnpackDuplicationSuppressor(c.unpackDuplicationSuppressor),
containerd.WithUnpackApplyOpts(diff.WithSyncFs(c.config.ImagePullWithSyncFs)),
}),
}
// Temporarily removed for v2 upgrade
//pullOpts = append(pullOpts, c.encryptedImagesPullOpts()...)
if !c.config.DisableSnapshotAnnotations {
pullOpts = append(pullOpts,
containerd.WithImageHandlerWrapper(snpkg.AppendInfoHandlerWrapper(ref)))
}
if c.config.DiscardUnpackedLayers {
// Allows GC to clean layers up from the content store after unpacking
pullOpts = append(pullOpts,
containerd.WithChildLabelMap(containerdimages.ChildGCLabelsFilterLayers))
}
pullReporter.start(pctx)
image, err := c.client.Pull(pctx, ref, pullOpts...)
pcancel()
if err != nil {
return "", fmt.Errorf("failed to pull and unpack image %q: %w", ref, err)
}
span.AddEvent("Pull and unpack image complete")
configDesc, err := image.Config(ctx)
if err != nil {
return "", fmt.Errorf("get image config descriptor: %w", err)
}
imageID := configDesc.Digest.String()
repoDigest, repoTag := getRepoDigestAndTag(namedRef, image.Target().Digest, isSchema1)
for _, r := range []string{imageID, repoTag, repoDigest} {
if r == "" {
continue
}
if err := c.createImageReference(ctx, r, image.Target(), labels); err != nil {
return "", fmt.Errorf("failed to create image reference %q: %w", r, err)
}
// Update image store to reflect the newest state in containerd.
// No need to use `updateImage`, because the image reference must
// have been managed by the cri plugin.
// TODO: Use image service directly
if err := c.imageStore.Update(ctx, r); err != nil {
return "", fmt.Errorf("failed to update image store %q: %w", r, err)
}
}
const mbToByte = 1024 * 1024
size, _ := image.Size(ctx)
imagePullingSpeed := float64(size) / mbToByte / time.Since(startTime).Seconds()
imagePullThroughput.Observe(imagePullingSpeed)
log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", name, imageID,
repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime))
// NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain
// in-memory image store, it's only for in-memory indexing. The image could be removed
// by someone else anytime, before/during/after we create the metadata. We should always
// check the actual state in containerd before using the image or returning status of the
// image.
return imageID, nil
}
// getRepoDigestAngTag returns image repoDigest and repoTag of the named image reference.
func getRepoDigestAndTag(namedRef distribution.Named, digest imagedigest.Digest, schema1 bool) (string, string) {
var repoTag, repoDigest string
if _, ok := namedRef.(distribution.NamedTagged); ok {
repoTag = namedRef.String()
}
if _, ok := namedRef.(distribution.Canonical); ok {
repoDigest = namedRef.String()
} else if !schema1 {
// digest is not actual repo digest for schema1 image.
repoDigest = namedRef.Name() + "@" + digest.String()
}
return repoDigest, repoTag
}
// ParseAuth parses AuthConfig and returns username and password/secret required by containerd.
func ParseAuth(auth *runtime.AuthConfig, host string) (string, string, error) {
if auth == nil {
return "", "", nil
}
if auth.ServerAddress != "" {
// Do not return the auth info when server address doesn't match.
u, err := url.Parse(auth.ServerAddress)
if err != nil {
return "", "", fmt.Errorf("parse server address: %w", err)
}
if host != u.Host {
return "", "", nil
}
}
if auth.Username != "" {
return auth.Username, auth.Password, nil
}
if auth.IdentityToken != "" {
return "", auth.IdentityToken, nil
}
if auth.Auth != "" {
decLen := base64.StdEncoding.DecodedLen(len(auth.Auth))
decoded := make([]byte, decLen)
_, err := base64.StdEncoding.Decode(decoded, []byte(auth.Auth))
if err != nil {
return "", "", err
}
user, passwd, ok := strings.Cut(string(decoded), ":")
if !ok {
return "", "", fmt.Errorf("invalid decoded auth: %q", decoded)
}
return user, strings.Trim(passwd, "\x00"), nil
}
// TODO(random-liu): Support RegistryToken.
// An empty auth config is valid for anonymous registry
return "", "", nil
}
// createImageReference creates image reference inside containerd image store.
// Note that because create and update are not finished in one transaction, there could be race. E.g.
// the image reference is deleted by someone else after create returns already exists, but before update
// happens.
func (c *CRIImageService) createImageReference(ctx context.Context, name string, desc imagespec.Descriptor, labels map[string]string) error {
img := containerdimages.Image{
Name: name,
Target: desc,
// Add a label to indicate that the image is managed by the cri plugin.
Labels: labels,
}
// TODO(random-liu): Figure out which is the more performant sequence create then update or
// update then create.
// TODO: Call CRIImageService directly
oldImg, err := c.images.Create(ctx, img)
if err == nil {
if c.publisher != nil {
if err := c.publisher.Publish(ctx, "/images/create", &eventstypes.ImageCreate{
Name: img.Name,
Labels: img.Labels,
}); err != nil {
return err
}
}
return nil
} else if !errdefs.IsAlreadyExists(err) {
return err
}
if oldImg.Target.Digest == img.Target.Digest && oldImg.Labels[crilabels.ImageLabelKey] == labels[crilabels.ImageLabelKey] {
return nil
}
_, err = c.images.Update(ctx, img, "target", "labels."+crilabels.ImageLabelKey)
if err == nil && c.publisher != nil {
if c.publisher != nil {
if err := c.publisher.Publish(ctx, "/images/update", &eventstypes.ImageUpdate{
Name: img.Name,
Labels: img.Labels,
}); err != nil {
return err
}
}
}
return err
}
// getLabels get image labels to be added on CRI image
func (c *CRIImageService) getLabels(ctx context.Context, name string) map[string]string {
labels := map[string]string{crilabels.ImageLabelKey: crilabels.ImageLabelValue}
for _, pinned := range c.config.PinnedImages {
if pinned == name {
labels[crilabels.PinnedImageLabelKey] = crilabels.PinnedImageLabelValue
}
}
return labels
}
// updateImage updates image store to reflect the newest state of an image reference
// in containerd. If the reference is not managed by the cri plugin, the function also
// generates necessary metadata for the image and make it managed.
func (c *CRIImageService) UpdateImage(ctx context.Context, r string) error {
// TODO: Use image service
img, err := c.client.GetImage(ctx, r)
if err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("get image by reference: %w", err)
}
if err == nil && img.Labels()[crilabels.ImageLabelKey] != crilabels.ImageLabelValue {
// Make sure the image has the image id as its unique
// identifier that references the image in its lifetime.
configDesc, err := img.Config(ctx)
if err != nil {
return fmt.Errorf("get image id: %w", err)
}
id := configDesc.Digest.String()
labels := c.getLabels(ctx, id)
if err := c.createImageReference(ctx, id, img.Target(), labels); err != nil {
return fmt.Errorf("create image id reference %q: %w", id, err)
}
if err := c.imageStore.Update(ctx, id); err != nil {
return fmt.Errorf("update image store for %q: %w", id, err)
}
// The image id is ready, add the label to mark the image as managed.
if err := c.createImageReference(ctx, r, img.Target(), labels); err != nil {
return fmt.Errorf("create managed label: %w", err)
}
}
// If the image is not found, we should continue updating the cache,
// so that the image can be removed from the cache.
if err := c.imageStore.Update(ctx, r); err != nil {
return fmt.Errorf("update image store for %q: %w", r, err)
}
return nil
}
func hostDirFromRoots(roots []string) func(string) (string, error) {
rootfn := make([]func(string) (string, error), len(roots))
for i := range roots {
rootfn[i] = config.HostDirFromRoot(roots[i])
}
return func(host string) (dir string, err error) {
for _, fn := range rootfn {
dir, err = fn(host)
if (err != nil && !errdefs.IsNotFound(err)) || (dir != "") {
break
}
}
return
}
}
// registryHosts is the registry hosts to be used by the resolver.
func (c *CRIImageService) registryHosts(ctx context.Context, credentials func(host string) (string, string, error), updateClientFn config.UpdateClientFunc) docker.RegistryHosts {
paths := filepath.SplitList(c.config.Registry.ConfigPath)
if len(paths) > 0 {
hostOptions := config.HostOptions{
UpdateClient: updateClientFn,
}
hostOptions.Credentials = credentials
hostOptions.HostDir = hostDirFromRoots(paths)
return config.ConfigureHosts(ctx, hostOptions)
}
return func(host string) ([]docker.RegistryHost, error) {
var registries []docker.RegistryHost
endpoints, err := c.registryEndpoints(host)
if err != nil {
return nil, fmt.Errorf("get registry endpoints: %w", err)
}
for _, e := range endpoints {
u, err := url.Parse(e)
if err != nil {
return nil, fmt.Errorf("parse registry endpoint %q from mirrors: %w", e, err)
}
var (
transport = newTransport()
client = &http.Client{Transport: transport}
config = c.config.Registry.Configs[u.Host]
)
if docker.IsLocalhost(host) && u.Scheme == "http" {
// Skipping TLS verification for localhost
transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true,
}
}
// Make a copy of `credentials`, so that different authorizers would not reference
// the same credentials variable.
credentials := credentials
if credentials == nil && config.Auth != nil {
auth := toRuntimeAuthConfig(*config.Auth)
credentials = func(host string) (string, string, error) {
return ParseAuth(auth, host)
}
}
if updateClientFn != nil {
if err := updateClientFn(client); err != nil {
return nil, fmt.Errorf("failed to update http client: %w", err)
}
}
authorizer := docker.NewDockerAuthorizer(
docker.WithAuthClient(client),
docker.WithAuthCreds(credentials))
if u.Path == "" {
u.Path = "/v2"
}
registries = append(registries, docker.RegistryHost{
Client: client,
Authorizer: authorizer,
Host: u.Host,
Scheme: u.Scheme,
Path: u.Path,
Capabilities: docker.HostCapabilityResolve | docker.HostCapabilityPull,
})
}
return registries, nil
}
}
// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config.
func toRuntimeAuthConfig(a criconfig.AuthConfig) *runtime.AuthConfig {
return &runtime.AuthConfig{
Username: a.Username,
Password: a.Password,
Auth: a.Auth,
IdentityToken: a.IdentityToken,
}
}
// defaultScheme returns the default scheme for a registry host.
func defaultScheme(host string) string {
if docker.IsLocalhost(host) {
return "http"
}
return "https"
}
// addDefaultScheme returns the endpoint with default scheme
func addDefaultScheme(endpoint string) (string, error) {
if strings.Contains(endpoint, "://") {
return endpoint, nil
}
ue := "dummy://" + endpoint
u, err := url.Parse(ue)
if err != nil {
return "", err
}
return fmt.Sprintf("%s://%s", defaultScheme(u.Host), endpoint), nil
}
// registryEndpoints returns endpoints for a given host.
// It adds default registry endpoint if it does not exist in the passed-in endpoint list.
// It also supports wildcard host matching with `*`.
func (c *CRIImageService) registryEndpoints(host string) ([]string, error) {
var endpoints []string
_, ok := c.config.Registry.Mirrors[host]
if ok {
endpoints = c.config.Registry.Mirrors[host].Endpoints
} else {
endpoints = c.config.Registry.Mirrors["*"].Endpoints
}
defaultHost, err := docker.DefaultHost(host)
if err != nil {
return nil, fmt.Errorf("get default host: %w", err)
}
for i := range endpoints {
en, err := addDefaultScheme(endpoints[i])
if err != nil {
return nil, fmt.Errorf("parse endpoint url: %w", err)
}
endpoints[i] = en
}
for _, e := range endpoints {
u, err := url.Parse(e)
if err != nil {
return nil, fmt.Errorf("parse endpoint url: %w", err)
}
if u.Host == host {
// Do not add default if the endpoint already exists.
return endpoints, nil
}
}
return append(endpoints, defaultScheme(defaultHost)+"://"+defaultHost), nil
}
// newTransport returns a new HTTP transport used to pull image.
// TODO(random-liu): Create a library and share this code with `ctr`.
func newTransport() *http.Transport {
return &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
FallbackDelay: 300 * time.Millisecond,
}).DialContext,
MaxIdleConns: 10,
IdleConnTimeout: 30 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 5 * time.Second,
}
}
// encryptedImagesPullOpts returns the necessary list of pull options required
// for decryption of encrypted images based on the cri decryption configuration.
// Temporarily removed for v2 upgrade
//func (c *CRIImageService) encryptedImagesPullOpts() []containerd.RemoteOpt {
// if c.config.ImageDecryption.KeyModel == criconfig.KeyModelNode {
// ltdd := imgcrypt.Payload{}
// decUnpackOpt := encryption.WithUnpackConfigApplyOpts(encryption.WithDecryptedUnpack(&ltdd))
// opt := containerd.WithUnpackOpts([]containerd.UnpackOpt{decUnpackOpt})
// return []containerd.RemoteOpt{opt}
// }
// return nil
//}
const (
// defaultPullProgressReportInterval represents that how often the
// reporter checks that pull progress.
defaultPullProgressReportInterval = 10 * time.Second
)
// pullProgressReporter is used to check single PullImage progress.
type pullProgressReporter struct {
ref string
cancel context.CancelFunc
reqReporter pullRequestReporter
timeout time.Duration
}
func newPullProgressReporter(ref string, cancel context.CancelFunc, timeout time.Duration) *pullProgressReporter {
return &pullProgressReporter{
ref: ref,
cancel: cancel,
reqReporter: pullRequestReporter{},
timeout: timeout,
}
}
func (reporter *pullProgressReporter) optionUpdateClient(client *http.Client) error {
client.Transport = &pullRequestReporterRoundTripper{
rt: client.Transport,
reqReporter: &reporter.reqReporter,
}
return nil
}
func (reporter *pullProgressReporter) start(ctx context.Context) {
if reporter.timeout == 0 {
log.G(ctx).Infof("no timeout and will not start pulling image %s reporter", reporter.ref)
return
}
go func() {
var (
reportInterval = defaultPullProgressReportInterval
lastSeenBytesRead = uint64(0)
lastSeenTimestamp = time.Now()
)
// check progress more frequently if timeout < default internal
if reporter.timeout < reportInterval {
reportInterval = reporter.timeout / 2
}
var ticker = time.NewTicker(reportInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
activeReqs, bytesRead := reporter.reqReporter.status()
log.G(ctx).WithField("ref", reporter.ref).
WithField("activeReqs", activeReqs).
WithField("totalBytesRead", bytesRead).
WithField("lastSeenBytesRead", lastSeenBytesRead).
WithField("lastSeenTimestamp", lastSeenTimestamp.Format(time.RFC3339)).
WithField("reportInterval", reportInterval).
Debugf("progress for image pull")
if activeReqs == 0 || bytesRead > lastSeenBytesRead {
lastSeenBytesRead = bytesRead
lastSeenTimestamp = time.Now()
continue
}
if time.Since(lastSeenTimestamp) > reporter.timeout {
log.G(ctx).Errorf("cancel pulling image %s because of no progress in %v", reporter.ref, reporter.timeout)
reporter.cancel()
return
}
case <-ctx.Done():
activeReqs, bytesRead := reporter.reqReporter.status()
log.G(ctx).Infof("stop pulling image %s: active requests=%v, bytes read=%v", reporter.ref, activeReqs, bytesRead)
return
}
}
}()
}
// countingReadCloser wraps http.Response.Body with pull request reporter,
// which is used by pullRequestReporterRoundTripper.
type countingReadCloser struct {
once sync.Once
rc io.ReadCloser
reqReporter *pullRequestReporter
}
// Read reads bytes from original io.ReadCloser and increases bytes in
// pull request reporter.
func (r *countingReadCloser) Read(p []byte) (int, error) {
n, err := r.rc.Read(p)
r.reqReporter.incByteRead(uint64(n))
return n, err
}
// Close closes the original io.ReadCloser and only decreases the number of
// active pull requests once.
func (r *countingReadCloser) Close() error {
err := r.rc.Close()
r.once.Do(r.reqReporter.decRequest)
return err
}
// pullRequestReporter is used to track the progress per each criapi.PullImage.
type pullRequestReporter struct {
// activeReqs indicates that current number of active pulling requests,
// including auth requests.
activeReqs int32
// totalBytesRead indicates that the total bytes has been read from
// remote registry.
totalBytesRead uint64
}
func (reporter *pullRequestReporter) incRequest() {
atomic.AddInt32(&reporter.activeReqs, 1)
}
func (reporter *pullRequestReporter) decRequest() {
atomic.AddInt32(&reporter.activeReqs, -1)
}
func (reporter *pullRequestReporter) incByteRead(nr uint64) {
atomic.AddUint64(&reporter.totalBytesRead, nr)
}
func (reporter *pullRequestReporter) status() (currentReqs int32, totalBytesRead uint64) {
currentReqs = atomic.LoadInt32(&reporter.activeReqs)
totalBytesRead = atomic.LoadUint64(&reporter.totalBytesRead)
return currentReqs, totalBytesRead
}
// pullRequestReporterRoundTripper wraps http.RoundTripper with pull request
// reporter which is used to track the progress of active http request with
// counting readable http.Response.Body.
//
// NOTE:
//
// Although containerd provides ingester manager to track the progress
// of pulling request, for example `ctr image pull` shows the console progress
// bar, it needs more CPU resources to open/read the ingested files with
// acquiring containerd metadata plugin's boltdb lock.
//
// Before sending HTTP request to registry, the containerd.Client.Pull library
// will open writer by containerd ingester manager. Based on this, the
// http.RoundTripper wrapper can track the active progress with lower overhead
// even if the ref has been locked in ingester manager by other Pull request.
type pullRequestReporterRoundTripper struct {
rt http.RoundTripper
reqReporter *pullRequestReporter
}
func (rt *pullRequestReporterRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
rt.reqReporter.incRequest()
resp, err := rt.rt.RoundTrip(req)
if err != nil {
rt.reqReporter.decRequest()
return nil, err
}
resp.Body = &countingReadCloser{
rc: resp.Body,
reqReporter: rt.reqReporter,
}
return resp, err
}
// Given that runtime information is not passed from PullImageRequest, we depend on an experimental annotation
// passed from pod sandbox config to get the runtimeHandler. The annotation key is specified in configuration.
// Once we know the runtime, try to override default snapshotter if it is set for this runtime.
// See https://github.com/containerd/containerd/issues/6657
func (c *CRIImageService) snapshotterFromPodSandboxConfig(ctx context.Context, imageRef string,
s *runtime.PodSandboxConfig) (string, error) {
snapshotter := c.config.Snapshotter
if s == nil || s.Annotations == nil {
return snapshotter, nil
}
runtimeHandler, ok := s.Annotations[annotations.RuntimeHandler]
if !ok {
return snapshotter, nil
}
// TODO: Ensure error is returned if runtime not found?
if c.runtimePlatforms != nil {
if p, ok := c.runtimePlatforms[runtimeHandler]; ok && p.Snapshotter != snapshotter {
snapshotter = p.Snapshotter
log.G(ctx).Infof("experimental: PullImage %q for runtime %s, using snapshotter %s", imageRef, runtimeHandler, snapshotter)
}
}
return snapshotter, nil
}

View File

@@ -1,543 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"encoding/base64"
"testing"
docker "github.com/distribution/reference"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/pkg/cri/annotations"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
"github.com/containerd/containerd/v2/pkg/cri/labels"
"github.com/containerd/platforms"
)
func TestParseAuth(t *testing.T) {
testUser := "username"
testPasswd := "password"
testAuthLen := base64.StdEncoding.EncodedLen(len(testUser + ":" + testPasswd))
testAuth := make([]byte, testAuthLen)
base64.StdEncoding.Encode(testAuth, []byte(testUser+":"+testPasswd))
invalidAuth := make([]byte, testAuthLen)
base64.StdEncoding.Encode(invalidAuth, []byte(testUser+"@"+testPasswd))
for _, test := range []struct {
desc string
auth *runtime.AuthConfig
host string
expectedUser string
expectedSecret string
expectErr bool
}{
{
desc: "should not return error if auth config is nil",
},
{
desc: "should not return error if empty auth is provided for access to anonymous registry",
auth: &runtime.AuthConfig{},
expectErr: false,
},
{
desc: "should support identity token",
auth: &runtime.AuthConfig{IdentityToken: "abcd"},
expectedSecret: "abcd",
},
{
desc: "should support username and password",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
},
expectedUser: testUser,
expectedSecret: testPasswd,
},
{
desc: "should support auth",
auth: &runtime.AuthConfig{Auth: string(testAuth)},
expectedUser: testUser,
expectedSecret: testPasswd,
},
{
desc: "should return error for invalid auth",
auth: &runtime.AuthConfig{Auth: string(invalidAuth)},
expectErr: true,
},
{
desc: "should return empty auth if server address doesn't match",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
ServerAddress: "https://registry-1.io",
},
host: "registry-2.io",
expectedUser: "",
expectedSecret: "",
},
{
desc: "should return auth if server address matches",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
ServerAddress: "https://registry-1.io",
},
host: "registry-1.io",
expectedUser: testUser,
expectedSecret: testPasswd,
},
{
desc: "should return auth if server address is not specified",
auth: &runtime.AuthConfig{
Username: testUser,
Password: testPasswd,
},
host: "registry-1.io",
expectedUser: testUser,
expectedSecret: testPasswd,
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
u, s, err := ParseAuth(test.auth, test.host)
assert.Equal(t, test.expectErr, err != nil)
assert.Equal(t, test.expectedUser, u)
assert.Equal(t, test.expectedSecret, s)
})
}
}
func TestRegistryEndpoints(t *testing.T) {
for _, test := range []struct {
desc string
mirrors map[string]criconfig.Mirror
host string
expected []string
}{
{
desc: "no mirror configured",
mirrors: map[string]criconfig.Mirror{
"registry-1.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-3.io",
},
},
{
desc: "mirror configured",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "wildcard mirror configured",
mirrors: map[string]criconfig.Mirror{
"*": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "host should take precedence if both host and wildcard mirrors are configured",
mirrors: map[string]criconfig.Mirror{
"*": {
Endpoints: []string{
"https://registry-1.io",
},
},
"registry-3.io": {
Endpoints: []string{
"https://registry-2.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "default endpoint in list with http",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
"http://registry-3.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"http://registry-3.io",
},
},
{
desc: "default endpoint in list with https",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io",
},
},
{
desc: "default endpoint in list with path",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io/path",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-1.io",
"https://registry-2.io",
"https://registry-3.io/path",
},
},
{
desc: "miss scheme endpoint in list with path",
mirrors: map[string]criconfig.Mirror{
"registry-3.io": {
Endpoints: []string{
"https://registry-3.io",
"registry-1.io",
"127.0.0.1:1234",
},
},
},
host: "registry-3.io",
expected: []string{
"https://registry-3.io",
"https://registry-1.io",
"http://127.0.0.1:1234",
},
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
c, _ := newTestCRIService()
c.config.Registry.Mirrors = test.mirrors
got, err := c.registryEndpoints(test.host)
assert.NoError(t, err)
assert.Equal(t, test.expected, got)
})
}
}
func TestDefaultScheme(t *testing.T) {
for _, test := range []struct {
desc string
host string
expected string
}{
{
desc: "should use http by default for localhost",
host: "localhost",
expected: "http",
},
{
desc: "should use http by default for localhost with port",
host: "localhost:8080",
expected: "http",
},
{
desc: "should use http by default for 127.0.0.1",
host: "127.0.0.1",
expected: "http",
},
{
desc: "should use http by default for 127.0.0.1 with port",
host: "127.0.0.1:8080",
expected: "http",
},
{
desc: "should use http by default for ::1",
host: "::1",
expected: "http",
},
{
desc: "should use http by default for ::1 with port",
host: "[::1]:8080",
expected: "http",
},
{
desc: "should use https by default for remote host",
host: "remote",
expected: "https",
},
{
desc: "should use https by default for remote host with port",
host: "remote:8080",
expected: "https",
},
{
desc: "should use https by default for remote ip",
host: "8.8.8.8",
expected: "https",
},
{
desc: "should use https by default for remote ip with port",
host: "8.8.8.8:8080",
expected: "https",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
got := defaultScheme(test.host)
assert.Equal(t, test.expected, got)
})
}
}
// Temporarily remove for v2 upgrade
//func TestEncryptedImagePullOpts(t *testing.T) {
// for _, test := range []struct {
// desc string
// keyModel string
// expectedOpts int
// }{
// {
// desc: "node key model should return one unpack opt",
// keyModel: criconfig.KeyModelNode,
// expectedOpts: 1,
// },
// {
// desc: "no key model selected should default to node key model",
// keyModel: "",
// expectedOpts: 0,
// },
// } {
// test := test
// t.Run(test.desc, func(t *testing.T) {
// c, _ := newTestCRIService()
// c.config.ImageDecryption.KeyModel = test.keyModel
// got := len(c.encryptedImagesPullOpts())
// assert.Equal(t, test.expectedOpts, got)
// })
// }
//}
func TestSnapshotterFromPodSandboxConfig(t *testing.T) {
defaultSnashotter := "native"
runtimeSnapshotter := "devmapper"
tests := []struct {
desc string
podSandboxConfig *runtime.PodSandboxConfig
expectSnapshotter string
expectErr bool
}{
{
desc: "should return default snapshotter for nil podSandboxConfig",
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return default snapshotter for nil podSandboxConfig.Annotations",
podSandboxConfig: &runtime.PodSandboxConfig{},
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return default snapshotter for empty podSandboxConfig.Annotations",
podSandboxConfig: &runtime.PodSandboxConfig{
Annotations: make(map[string]string),
},
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return default snapshotter for runtime not found",
podSandboxConfig: &runtime.PodSandboxConfig{
Annotations: map[string]string{
annotations.RuntimeHandler: "runtime-not-exists",
},
},
expectSnapshotter: defaultSnashotter,
},
{
desc: "should return snapshotter provided in podSandboxConfig.Annotations",
podSandboxConfig: &runtime.PodSandboxConfig{
Annotations: map[string]string{
annotations.RuntimeHandler: "exiting-runtime",
},
},
expectSnapshotter: runtimeSnapshotter,
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
cri, _ := newTestCRIService()
cri.config.Snapshotter = defaultSnashotter
cri.runtimePlatforms["exiting-runtime"] = ImagePlatform{
Platform: platforms.DefaultSpec(),
Snapshotter: runtimeSnapshotter,
}
snapshotter, err := cri.snapshotterFromPodSandboxConfig(context.Background(), "test-image", tt.podSandboxConfig)
assert.Equal(t, tt.expectSnapshotter, snapshotter)
if tt.expectErr {
assert.Error(t, err)
}
})
}
}
func TestGetRepoDigestAndTag(t *testing.T) {
digest := digest.Digest("sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582")
for _, test := range []struct {
desc string
ref string
schema1 bool
expectedRepoDigest string
expectedRepoTag string
}{
{
desc: "repo tag should be empty if original ref has no tag",
ref: "gcr.io/library/busybox@" + digest.String(),
expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
},
{
desc: "repo tag should not be empty if original ref has tag",
ref: "gcr.io/library/busybox:latest",
expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
expectedRepoTag: "gcr.io/library/busybox:latest",
},
{
desc: "repo digest should be empty if original ref is schema1 and has no digest",
ref: "gcr.io/library/busybox:latest",
schema1: true,
expectedRepoDigest: "",
expectedRepoTag: "gcr.io/library/busybox:latest",
},
{
desc: "repo digest should not be empty if original ref is schema1 but has digest",
ref: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
schema1: true,
expectedRepoDigest: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
expectedRepoTag: "",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
named, err := docker.ParseDockerRef(test.ref)
assert.NoError(t, err)
repoDigest, repoTag := getRepoDigestAndTag(named, digest, test.schema1)
assert.Equal(t, test.expectedRepoDigest, repoDigest)
assert.Equal(t, test.expectedRepoTag, repoTag)
})
}
}
func TestImageGetLabels(t *testing.T) {
criService, _ := newTestCRIService()
tests := []struct {
name string
expectedLabel map[string]string
pinnedImages map[string]string
pullImageName string
}{
{
name: "pinned image labels should get added on sandbox image",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{"sandbox": "k8s.gcr.io/pause:3.9"},
pullImageName: "k8s.gcr.io/pause:3.9",
},
{
name: "pinned image labels should get added on sandbox image without tag",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{"sandboxnotag": "k8s.gcr.io/pause", "sandbox": "k8s.gcr.io/pause:latest"},
pullImageName: "k8s.gcr.io/pause:latest",
},
{
name: "pinned image labels should get added on sandbox image specified with tag and digest both",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{
"sandboxtagdigest": "k8s.gcr.io/pause:3.9@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
"sandbox": "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
},
pullImageName: "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
},
{
name: "pinned image labels should get added on sandbox image specified with digest",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue, labels.PinnedImageLabelKey: labels.PinnedImageLabelValue},
pinnedImages: map[string]string{"sandbox": "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2"},
pullImageName: "k8s.gcr.io/pause@sha256:45b23dee08af5e43a7fea6c4cf9c25ccf269ee113168c19722f87876677c5cb2",
},
{
name: "pinned image labels should not get added on other image",
expectedLabel: map[string]string{labels.ImageLabelKey: labels.ImageLabelValue},
pinnedImages: map[string]string{"sandbox": "k8s.gcr.io/pause:3.9"},
pullImageName: "k8s.gcr.io/random:latest",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
criService.config.PinnedImages = tt.pinnedImages
labels := criService.getLabels(context.Background(), tt.pullImageName)
assert.Equal(t, tt.expectedLabel, labels)
})
}
}

View File

@@ -1,79 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"fmt"
eventstypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// RemoveImage removes the image.
// TODO(random-liu): Update CRI to pass image reference instead of ImageSpec. (See
// kubernetes/kubernetes#46255)
// TODO(random-liu): We should change CRI to distinguish image id and image spec.
// Remove the whole image no matter the it's image id or reference. This is the
// semantic defined in CRI now.
func (c *GRPCCRIImageService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (*runtime.RemoveImageResponse, error) {
span := tracing.SpanFromContext(ctx)
// TODO: Move to separate function
image, err := c.LocalResolve(r.GetImage().GetImage())
if err != nil {
if errdefs.IsNotFound(err) {
span.AddEvent(err.Error())
// return empty without error when image not found.
return &runtime.RemoveImageResponse{}, nil
}
return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err)
}
span.SetAttributes(tracing.Attribute("image.id", image.ID))
// Remove all image references.
for i, ref := range image.References {
var opts []images.DeleteOpt
if i == len(image.References)-1 {
// Delete the last image reference synchronously to trigger garbage collection.
// This is best effort. It is possible that the image reference is deleted by
// someone else before this point.
opts = []images.DeleteOpt{images.SynchronousDelete()}
}
err = c.images.Delete(ctx, ref, opts...)
if err == nil || errdefs.IsNotFound(err) {
// Update image store to reflect the newest state in containerd.
if err := c.imageStore.Update(ctx, ref); err != nil {
return nil, fmt.Errorf("failed to update image reference %q for %q: %w", ref, image.ID, err)
}
if c.publisher != nil {
if err := c.publisher.Publish(ctx, "/images/delete", &eventstypes.ImageDelete{
Name: ref,
}); err != nil {
return nil, err
}
}
continue
}
return nil, fmt.Errorf("failed to delete image reference %q for %q: %w", ref, image.ID, err)
}
return &runtime.RemoveImageResponse{}, nil
}

View File

@@ -1,132 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
"github.com/containerd/containerd/v2/pkg/cri/util"
"github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs"
"github.com/containerd/log"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ImageStatus returns the status of the image, returns nil if the image isn't present.
// TODO(random-liu): We should change CRI to distinguish image id and image spec. (See
// kubernetes/kubernetes#46255)
func (c *CRIImageService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (*runtime.ImageStatusResponse, error) {
span := tracing.SpanFromContext(ctx)
image, err := c.LocalResolve(r.GetImage().GetImage())
if err != nil {
if errdefs.IsNotFound(err) {
span.AddEvent(err.Error())
// return empty without error when image not found.
return &runtime.ImageStatusResponse{}, nil
}
return nil, fmt.Errorf("can not resolve %q locally: %w", r.GetImage().GetImage(), err)
}
span.SetAttributes(tracing.Attribute("image.id", image.ID))
// TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
// doesn't exist?
runtimeImage := toCRIImage(image)
info, err := c.toCRIImageInfo(ctx, &image, r.GetVerbose())
if err != nil {
return nil, fmt.Errorf("failed to generate image info: %w", err)
}
return &runtime.ImageStatusResponse{
Image: runtimeImage,
Info: info,
}, nil
}
// toCRIImage converts internal image object to CRI runtime.Image.
func toCRIImage(image imagestore.Image) *runtime.Image {
repoTags, repoDigests := util.ParseImageReferences(image.References)
runtimeImage := &runtime.Image{
Id: image.ID,
RepoTags: repoTags,
RepoDigests: repoDigests,
Size_: uint64(image.Size),
Pinned: image.Pinned,
}
uid, username := getUserFromImage(image.ImageSpec.Config.User)
if uid != nil {
runtimeImage.Uid = &runtime.Int64Value{Value: *uid}
}
runtimeImage.Username = username
return runtimeImage
}
// getUserFromImage gets uid or user name of the image user.
// If user is numeric, it will be treated as uid; or else, it is treated as user name.
func getUserFromImage(user string) (*int64, string) {
// return both empty if user is not specified in the image.
if user == "" {
return nil, ""
}
// split instances where the id may contain user:group
user = strings.Split(user, ":")[0]
// user could be either uid or user name. Try to interpret as numeric uid.
uid, err := strconv.ParseInt(user, 10, 64)
if err != nil {
// If user is non numeric, assume it's user name.
return nil, user
}
// If user is a numeric uid.
return &uid, ""
}
// TODO (mikebrow): discuss moving this struct and / or constants for info map for some or all of these fields to CRI
type verboseImageInfo struct {
ChainID string `json:"chainID"`
ImageSpec imagespec.Image `json:"imageSpec"`
}
// toCRIImageInfo converts internal image object information to CRI image status response info map.
func (c *CRIImageService) toCRIImageInfo(ctx context.Context, image *imagestore.Image, verbose bool) (map[string]string, error) {
if !verbose {
return nil, nil
}
info := make(map[string]string)
imi := &verboseImageInfo{
ChainID: image.ChainID,
ImageSpec: image.ImageSpec,
}
m, err := json.Marshal(imi)
if err == nil {
info["info"] = string(m)
} else {
log.G(ctx).WithError(err).Errorf("failed to marshal info %v", imi)
info["info"] = err.Error()
}
return info, nil
}

View File

@@ -1,139 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
"github.com/containerd/containerd/v2/pkg/cri/util"
)
func TestImageStatus(t *testing.T) {
testID := "sha256:d848ce12891bf78792cda4a23c58984033b0c397a55e93a1556202222ecc5ed4" // #nosec G101
image := imagestore.Image{
ID: testID,
ChainID: "test-chain-id",
References: []string{
"gcr.io/library/busybox:latest",
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 1234,
ImageSpec: imagespec.Image{
Config: imagespec.ImageConfig{
User: "user:group",
},
},
}
expected := &runtime.Image{
Id: testID,
RepoTags: []string{"gcr.io/library/busybox:latest"},
RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
Size_: uint64(1234),
Username: "user",
}
c, g := newTestCRIService()
t.Logf("should return nil image spec without error for non-exist image")
resp, err := c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{
Image: &runtime.ImageSpec{Image: testID},
})
assert.NoError(t, err)
require.NotNil(t, resp)
assert.Nil(t, resp.GetImage())
c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image})
assert.NoError(t, err)
t.Logf("should return correct image status for exist image")
resp, err = g.ImageStatus(context.Background(), &runtime.ImageStatusRequest{
Image: &runtime.ImageSpec{Image: testID},
})
assert.NoError(t, err)
assert.NotNil(t, resp)
assert.Equal(t, expected, resp.GetImage())
}
func TestParseImageReferences(t *testing.T) {
refs := []string{
"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"gcr.io/library/busybox:1.2",
"sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"arbitrary-ref",
}
expectedTags := []string{
"gcr.io/library/busybox:1.2",
}
expectedDigests := []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}
tags, digests := util.ParseImageReferences(refs)
assert.Equal(t, expectedTags, tags)
assert.Equal(t, expectedDigests, digests)
}
// TestGetUserFromImage tests the logic of getting image uid or user name of image user.
func TestGetUserFromImage(t *testing.T) {
newI64 := func(i int64) *int64 { return &i }
for _, test := range []struct {
desc string
user string
uid *int64
name string
}{
{
desc: "no gid",
user: "0",
uid: newI64(0),
},
{
desc: "uid/gid",
user: "0:1",
uid: newI64(0),
},
{
desc: "empty user",
user: "",
},
{
desc: "multiple separators",
user: "1:2:3",
uid: newI64(1),
},
{
desc: "root username",
user: "root:root",
name: "root",
},
{
desc: "username",
user: "test:test",
name: "test",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
actualUID, actualName := getUserFromImage(test.user)
assert.Equal(t, test.uid, actualUID)
assert.Equal(t, test.name, actualName)
})
}
}

View File

@@ -1,83 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"time"
"github.com/containerd/containerd/v2/pkg/cri/store/snapshot"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// ImageFsInfo returns information of the filesystem that is used to store images.
// TODO(windows): Usage for windows is always 0 right now. Support this for windows.
// TODO(random-liu): Handle storage consumed by content store
func (c *CRIImageService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) {
snapshots := c.snapshotStore.List()
snapshotterFSInfos := map[string]snapshot.Snapshot{}
for _, sn := range snapshots {
if info, ok := snapshotterFSInfos[sn.Key.Snapshotter]; ok {
// Use the oldest timestamp as the timestamp of imagefs info.
if sn.Timestamp < info.Timestamp {
info.Timestamp = sn.Timestamp
}
info.Size += sn.Size
info.Inodes += sn.Inodes
snapshotterFSInfos[sn.Key.Snapshotter] = info
} else {
snapshotterFSInfos[sn.Key.Snapshotter] = snapshot.Snapshot{
Timestamp: sn.Timestamp,
Size: sn.Size,
Inodes: sn.Inodes,
}
}
}
var imageFilesystems []*runtime.FilesystemUsage
// Currently kubelet always consumes the first entry of the returned array,
// so put the default snapshotter as the first entry for compatibility.
if info, ok := snapshotterFSInfos[c.config.Snapshotter]; ok {
imageFilesystems = append(imageFilesystems, &runtime.FilesystemUsage{
Timestamp: info.Timestamp,
FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPaths[c.config.Snapshotter]},
UsedBytes: &runtime.UInt64Value{Value: info.Size},
InodesUsed: &runtime.UInt64Value{Value: info.Inodes},
})
delete(snapshotterFSInfos, c.config.Snapshotter)
} else {
imageFilesystems = append(imageFilesystems, &runtime.FilesystemUsage{
Timestamp: time.Now().UnixNano(),
FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPaths[c.config.Snapshotter]},
UsedBytes: &runtime.UInt64Value{Value: 0},
InodesUsed: &runtime.UInt64Value{Value: 0},
})
}
for snapshotter, info := range snapshotterFSInfos {
imageFilesystems = append(imageFilesystems, &runtime.FilesystemUsage{
Timestamp: info.Timestamp,
FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPaths[snapshotter]},
UsedBytes: &runtime.UInt64Value{Value: info.Size},
InodesUsed: &runtime.UInt64Value{Value: info.Inodes},
})
}
return &runtime.ImageFsInfoResponse{ImageFilesystems: imageFilesystems}, nil
}

View File

@@ -1,80 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
snapshot "github.com/containerd/containerd/v2/core/snapshots"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
snapshotstore "github.com/containerd/containerd/v2/pkg/cri/store/snapshot"
)
func TestImageFsInfo(t *testing.T) {
c, g := newTestCRIService()
snapshots := []snapshotstore.Snapshot{
{
Key: snapshotstore.Key{
Key: "key1",
Snapshotter: "overlayfs",
},
Kind: snapshot.KindActive,
Size: 10,
Inodes: 100,
Timestamp: 234567,
},
{
Key: snapshotstore.Key{
Key: "key2",
Snapshotter: "overlayfs",
},
Kind: snapshot.KindCommitted,
Size: 20,
Inodes: 200,
Timestamp: 123456,
},
{
Key: snapshotstore.Key{
Key: "key3",
Snapshotter: "overlayfs",
},
Kind: snapshot.KindView,
Size: 0,
Inodes: 0,
Timestamp: 345678,
},
}
expected := &runtime.FilesystemUsage{
Timestamp: 123456,
FsId: &runtime.FilesystemIdentifier{Mountpoint: testImageFSPath},
UsedBytes: &runtime.UInt64Value{Value: 30},
InodesUsed: &runtime.UInt64Value{Value: 300},
}
for _, sn := range snapshots {
c.snapshotStore.Add(sn)
}
resp, err := g.ImageFsInfo(context.Background(), &runtime.ImageFsInfoRequest{})
require.NoError(t, err)
stats := resp.GetImageFilesystems()
// stats[0] is for default snapshotter, stats[1] is for `overlayfs`
assert.Len(t, stats, 2)
assert.Equal(t, expected, stats[1])
}

View File

@@ -1,53 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"github.com/docker/go-metrics"
prom "github.com/prometheus/client_golang/prometheus"
)
var (
imagePulls metrics.LabeledCounter
inProgressImagePulls metrics.Gauge
// image size in MB / image pull duration in seconds
imagePullThroughput prom.Histogram
)
func init() {
const (
namespace = "containerd"
subsystem = "cri_sandboxed"
)
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
ns := metrics.NewNamespace(namespace, subsystem, nil)
imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status")
inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total)
imagePullThroughput = prom.NewHistogram(
prom.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "image_pulling_throughput",
Help: "image pull throughput",
Buckets: prom.DefBuckets,
},
)
ns.Add(imagePullThroughput)
metrics.Register(ns)
}

View File

@@ -1,198 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"time"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/internal/kmutex"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
snapshotstore "github.com/containerd/containerd/v2/pkg/cri/store/snapshot"
"github.com/containerd/containerd/v2/pkg/events"
"github.com/containerd/log"
"github.com/containerd/platforms"
docker "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
type imageClient interface {
ListImages(context.Context, ...string) ([]containerd.Image, error)
GetImage(context.Context, string) (containerd.Image, error)
Pull(context.Context, string, ...containerd.RemoteOpt) (containerd.Image, error)
}
type ImagePlatform struct {
Snapshotter string
Platform platforms.Platform
}
type CRIImageService struct {
// config contains all image configurations.
config criconfig.ImageConfig
// images is the lower level image store used for raw storage,
// no event publishing should currently be assumed
images images.Store
// publisher is the events publisher
publisher events.Publisher
// client is a subset of the containerd client
// and will be replaced by image store and transfer service
client imageClient
// imageFSPaths contains path to image filesystem for snapshotters.
imageFSPaths map[string]string
// runtimePlatforms are the platforms configured for a runtime.
runtimePlatforms map[string]ImagePlatform
// imageStore stores all resources associated with images.
imageStore *imagestore.Store
// snapshotStore stores information of all snapshots.
snapshotStore *snapshotstore.Store
// unpackDuplicationSuppressor is used to make sure that there is only
// one in-flight fetch request or unpack handler for a given descriptor's
// or chain ID.
unpackDuplicationSuppressor kmutex.KeyedLocker
}
type GRPCCRIImageService struct {
*CRIImageService
}
type CRIImageServiceOptions struct {
Content content.Store
Images images.Store
ImageFSPaths map[string]string
RuntimePlatforms map[string]ImagePlatform
Snapshotters map[string]snapshots.Snapshotter
Publisher events.Publisher
Client imageClient
}
// NewService creates a new CRI Image Service
//
// TODO:
// 1. Generalize the image service and merge with a single higher level image service
// 2. Update the options to remove client and imageFSPath
// - Platform configuration with Array/Map of snapshotter names + filesystem ID + platform matcher + runtime to snapshotter
// - Transfer service implementation
// - Image Service (from metadata)
// - Content store (from metadata)
// 3. Separate image cache and snapshot cache to first class plugins, make the snapshot cache much more efficient and intelligent
func NewService(config criconfig.ImageConfig, options *CRIImageServiceOptions) (*CRIImageService, error) {
svc := CRIImageService{
config: config,
images: options.Images,
client: options.Client,
imageStore: imagestore.NewStore(options.Images, options.Content, platforms.Default()),
imageFSPaths: options.ImageFSPaths,
runtimePlatforms: options.RuntimePlatforms,
snapshotStore: snapshotstore.NewStore(),
unpackDuplicationSuppressor: kmutex.New(),
}
log.L.Info("Start snapshots syncer")
snapshotsSyncer := newSnapshotsSyncer(
svc.snapshotStore,
options.Snapshotters,
time.Duration(svc.config.StatsCollectPeriod)*time.Second,
)
snapshotsSyncer.start()
return &svc, nil
}
// LocalResolve resolves image reference locally and returns corresponding image metadata. It
// returns errdefs.ErrNotFound if the reference doesn't exist.
func (c *CRIImageService) LocalResolve(refOrID string) (imagestore.Image, error) {
getImageID := func(refOrId string) string {
if _, err := imagedigest.Parse(refOrID); err == nil {
return refOrID
}
return func(ref string) string {
// ref is not image id, try to resolve it locally.
// TODO(random-liu): Handle this error better for debugging.
normalized, err := docker.ParseDockerRef(ref)
if err != nil {
return ""
}
id, err := c.imageStore.Resolve(normalized.String())
if err != nil {
return ""
}
return id
}(refOrID)
}
imageID := getImageID(refOrID)
if imageID == "" {
// Try to treat ref as imageID
imageID = refOrID
}
return c.imageStore.Get(imageID)
}
// RuntimeSnapshotter overrides the default snapshotter if Snapshotter is set for this runtime.
// See https://github.com/containerd/containerd/issues/6657
// TODO: Pass in name and get back runtime platform
func (c *CRIImageService) RuntimeSnapshotter(ctx context.Context, ociRuntime criconfig.Runtime) string {
if ociRuntime.Snapshotter == "" {
return c.config.Snapshotter
}
log.G(ctx).Debugf("Set snapshotter for runtime %s to %s", ociRuntime.Type, ociRuntime.Snapshotter)
return ociRuntime.Snapshotter
}
// GetImage gets image metadata by image id.
func (c *CRIImageService) GetImage(id string) (imagestore.Image, error) {
return c.imageStore.Get(id)
}
// GetSnapshot returns the snapshot with specified key.
func (c *CRIImageService) GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error) {
snapshotKey := snapshotstore.Key{
Key: key,
Snapshotter: snapshotter,
}
return c.snapshotStore.Get(snapshotKey)
}
func (c *CRIImageService) ImageFSPaths() map[string]string {
return c.imageFSPaths
}
// PinnedImage is used to lookup a pinned image by name.
// Most often used to get the "sandbox" image.
func (c *CRIImageService) PinnedImage(name string) string {
return c.config.PinnedImages[name]
}
// GRPCService returns a new CRI Image Service grpc server.
func (c *CRIImageService) GRPCService() runtime.ImageServiceServer {
return &GRPCCRIImageService{c}
}

View File

@@ -1,129 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"context"
"testing"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
snapshotstore "github.com/containerd/containerd/v2/pkg/cri/store/snapshot"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
"github.com/stretchr/testify/assert"
)
const (
testImageFSPath = "/test/image/fs/path"
// Use an image id as test sandbox image to avoid image name resolve.
// TODO(random-liu): Change this to image name after we have complete image
// management unit test framework.
testSandboxImage = "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113798" // #nosec G101
)
// newTestCRIService creates a fake criService for test.
func newTestCRIService() (*CRIImageService, *GRPCCRIImageService) {
service := &CRIImageService{
config: testImageConfig,
runtimePlatforms: map[string]ImagePlatform{},
imageFSPaths: map[string]string{"overlayfs": testImageFSPath},
imageStore: imagestore.NewStore(nil, nil, platforms.Default()),
snapshotStore: snapshotstore.NewStore(),
}
return service, &GRPCCRIImageService{service}
}
var testImageConfig = criconfig.ImageConfig{
PinnedImages: map[string]string{
"sandbox": testSandboxImage,
},
}
func TestLocalResolve(t *testing.T) {
image := imagestore.Image{
ID: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
ChainID: "test-chain-id-1",
References: []string{
"docker.io/library/busybox:latest",
"docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
},
Size: 10,
}
c, _ := newTestCRIService()
var err error
c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image})
assert.NoError(t, err)
for _, ref := range []string{
"sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
"busybox",
"busybox:latest",
"busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"library/busybox",
"library/busybox:latest",
"library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"docker.io/busybox",
"docker.io/busybox:latest",
"docker.io/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
"docker.io/library/busybox",
"docker.io/library/busybox:latest",
"docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
} {
img, err := c.LocalResolve(ref)
assert.NoError(t, err)
assert.Equal(t, image, img)
}
img, err := c.LocalResolve("randomid")
assert.Equal(t, errdefs.IsNotFound(err), true)
assert.Equal(t, imagestore.Image{}, img)
}
func TestRuntimeSnapshotter(t *testing.T) {
defaultRuntime := criconfig.Runtime{
Snapshotter: "",
}
fooRuntime := criconfig.Runtime{
Snapshotter: "devmapper",
}
for _, test := range []struct {
desc string
runtime criconfig.Runtime
expectSnapshotter string
}{
{
desc: "should return default snapshotter when runtime.Snapshotter is not set",
runtime: defaultRuntime,
expectSnapshotter: criconfig.DefaultImageConfig().Snapshotter,
},
{
desc: "should return overridden snapshotter when runtime.Snapshotter is set",
runtime: fooRuntime,
expectSnapshotter: "devmapper",
},
} {
test := test
t.Run(test.desc, func(t *testing.T) {
cri, _ := newTestCRIService()
cri.config = criconfig.DefaultImageConfig()
assert.Equal(t, test.expectSnapshotter, cri.RuntimeSnapshotter(context.Background(), test.runtime))
})
}
}

Some files were not shown because too many files have changed in this diff Show More