687 lines
33 KiB
Go
687 lines
33 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package config
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/url"
|
|
"time"
|
|
|
|
"github.com/containerd/log"
|
|
"github.com/pelletier/go-toml/v2"
|
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
|
"k8s.io/kubelet/pkg/cri/streaming"
|
|
|
|
runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
|
|
runcoptions "github.com/containerd/containerd/api/types/runc/options"
|
|
runtimeoptions "github.com/containerd/containerd/api/types/runtimeoptions/v1"
|
|
"github.com/containerd/containerd/v2/internal/cri/annotations"
|
|
"github.com/containerd/containerd/v2/pkg/deprecation"
|
|
"github.com/containerd/containerd/v2/plugins"
|
|
)
|
|
|
|
const (
|
|
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
|
|
//
|
|
// NOTE:
|
|
//
|
|
// This ImagePullProgressTimeout feature is ported from kubelet/dockershim's
|
|
// --image-pull-progress-deadline. The original value is 1m0. Unlike docker
|
|
// daemon, the containerd doesn't have global concurrent download limitation
|
|
// before migrating to Transfer Service. If kubelet runs with concurrent
|
|
// image pull, the node will run under IO pressure. The ImagePull process
|
|
// could be impacted by self, if the target image is large one with a
|
|
// lot of layers. And also both container's writable layers and image's storage
|
|
// share one disk. The ImagePull process commits blob to content store
|
|
// with fsync, which might bring the unrelated files' dirty pages into
|
|
// disk in one transaction [1]. The 1m0 value isn't good enough. Based
|
|
// on #9347 case and kubernetes community's usage [2], the default value
|
|
// is updated to 5m0. If end-user still runs into unexpected cancel,
|
|
// they need to config it based on their environment.
|
|
//
|
|
// [1]: Fast commits for ext4 - https://lwn.net/Articles/842385/
|
|
// [2]: https://github.com/kubernetes/kubernetes/blob/1635c380b26a1d8cc25d36e9feace9797f4bae3c/cluster/gce/util.sh#L882
|
|
defaultImagePullProgressTimeoutDuration = 5 * time.Minute
|
|
)
|
|
|
|
type SandboxControllerMode string
|
|
|
|
const (
|
|
// ModePodSandbox means use Controller implementation from sbserver podsandbox package.
|
|
// We take this one as a default mode.
|
|
ModePodSandbox SandboxControllerMode = "podsandbox"
|
|
// ModeShim means use whatever Controller implementation provided by shim.
|
|
ModeShim SandboxControllerMode = "shim"
|
|
// DefaultSandboxImage is the default image to use for sandboxes when empty or
|
|
// for default configurations.
|
|
DefaultSandboxImage = "registry.k8s.io/pause:3.10"
|
|
// IOTypeFifo is container io implemented by creating named pipe
|
|
IOTypeFifo = "fifo"
|
|
// IOTypeStreaming is container io implemented by connecting the streaming api to sandbox endpoint
|
|
IOTypeStreaming = "streaming"
|
|
)
|
|
|
|
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
|
|
// and a runtime for untrusted workload.
|
|
type Runtime struct {
|
|
// Type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
|
|
Type string `toml:"runtime_type" json:"runtimeType"`
|
|
// Path is an optional field that can be used to overwrite path to a shim runtime binary.
|
|
// When specified, containerd will ignore runtime name field when resolving shim location.
|
|
// Path must be abs.
|
|
Path string `toml:"runtime_path" json:"runtimePath"`
|
|
// PodAnnotations is a list of pod annotations passed to both pod sandbox as well as
|
|
// container OCI annotations.
|
|
PodAnnotations []string `toml:"pod_annotations" json:"PodAnnotations"`
|
|
// ContainerAnnotations is a list of container annotations passed through to the OCI config of the containers.
|
|
// Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
|
|
// Currently, only device plugins populate the annotations.
|
|
ContainerAnnotations []string `toml:"container_annotations" json:"ContainerAnnotations"`
|
|
// Options are config options for the runtime.
|
|
Options map[string]interface{} `toml:"options" json:"options"`
|
|
// PrivilegedWithoutHostDevices overloads the default behaviour for adding host devices to the
|
|
// runtime spec when the container is privileged. Defaults to false.
|
|
PrivilegedWithoutHostDevices bool `toml:"privileged_without_host_devices" json:"privileged_without_host_devices"`
|
|
// PrivilegedWithoutHostDevicesAllDevicesAllowed overloads the default behaviour device allowlisting when
|
|
// to the runtime spec when the container when PrivilegedWithoutHostDevices is already enabled. Requires
|
|
// PrivilegedWithoutHostDevices to be enabled. Defaults to false.
|
|
PrivilegedWithoutHostDevicesAllDevicesAllowed bool `toml:"privileged_without_host_devices_all_devices_allowed" json:"privileged_without_host_devices_all_devices_allowed"`
|
|
// BaseRuntimeSpec is a json file with OCI spec to use as base spec that all container's will be created from.
|
|
BaseRuntimeSpec string `toml:"base_runtime_spec" json:"baseRuntimeSpec"`
|
|
// NetworkPluginConfDir is a directory containing the CNI network information for the runtime class.
|
|
NetworkPluginConfDir string `toml:"cni_conf_dir" json:"cniConfDir"`
|
|
// NetworkPluginMaxConfNum is the max number of plugin config files that will
|
|
// be loaded from the cni config directory by go-cni. Set the value to 0 to
|
|
// load all config files (no arbitrary limit). The legacy default value is 1.
|
|
NetworkPluginMaxConfNum int `toml:"cni_max_conf_num" json:"cniMaxConfNum"`
|
|
// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
|
|
// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
|
|
// while using default snapshotters for operational simplicity.
|
|
// See https://github.com/containerd/containerd/issues/6657 for details.
|
|
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
|
|
// Sandboxer defines which sandbox runtime to use when scheduling pods
|
|
// This features requires the new CRI server implementation (enabled by default in 2.0)
|
|
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
|
|
// podsandbox - means use Controller implementation from sbserver podsandbox package.
|
|
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
|
|
// IOType defines how containerd transfer the io streams of the container
|
|
// if it is not set, the named pipe will be created for the container
|
|
// we can also set it to "streaming" to create a stream by streaming api,
|
|
// and use it as a channel to transfer the io stream
|
|
IOType string `toml:"io_type" json:"io_type"`
|
|
}
|
|
|
|
// ContainerdConfig contains toml config related to containerd
|
|
type ContainerdConfig struct {
|
|
// DefaultRuntimeName is the default runtime name to use from the runtimes table.
|
|
DefaultRuntimeName string `toml:"default_runtime_name" json:"defaultRuntimeName"`
|
|
|
|
// Runtimes is a map from CRI RuntimeHandler strings, which specify types of runtime
|
|
// configurations, to the matching configurations.
|
|
Runtimes map[string]Runtime `toml:"runtimes" json:"runtimes"`
|
|
|
|
// IgnoreBlockIONotEnabledErrors is a boolean flag to ignore
|
|
// blockio related errors when blockio support has not been
|
|
// enabled.
|
|
IgnoreBlockIONotEnabledErrors bool `toml:"ignore_blockio_not_enabled_errors" json:"ignoreBlockIONotEnabledErrors"`
|
|
|
|
// IgnoreRdtNotEnabledErrors is a boolean flag to ignore RDT related errors
|
|
// when RDT support has not been enabled.
|
|
IgnoreRdtNotEnabledErrors bool `toml:"ignore_rdt_not_enabled_errors" json:"ignoreRdtNotEnabledErrors"`
|
|
}
|
|
|
|
// CniConfig contains toml config related to cni
|
|
type CniConfig struct {
|
|
// NetworkPluginBinDir is the directory in which the binaries for the plugin is kept.
|
|
NetworkPluginBinDir string `toml:"bin_dir" json:"binDir"`
|
|
// NetworkPluginConfDir is the directory in which the admin places a CNI conf.
|
|
NetworkPluginConfDir string `toml:"conf_dir" json:"confDir"`
|
|
// NetworkPluginMaxConfNum is the max number of plugin config files that will
|
|
// be loaded from the cni config directory by go-cni. Set the value to 0 to
|
|
// load all config files (no arbitrary limit). The legacy default value is 1.
|
|
NetworkPluginMaxConfNum int `toml:"max_conf_num" json:"maxConfNum"`
|
|
// NetworkPluginSetupSerially is a boolean flag to specify whether containerd sets up networks serially
|
|
// if there are multiple CNI plugin config files existing and NetworkPluginMaxConfNum is larger than 1.
|
|
//
|
|
// NOTE: On the Linux platform, containerd provides loopback network
|
|
// configuration by default. There are at least two network plugins.
|
|
// The default value of NetworkPluginSetupSerially is false which means
|
|
// the loopback and eth0 are handled in parallel mode. Since the loopback
|
|
// device is created as the net namespace is created, it's safe to run
|
|
// in parallel mode as the default setting.
|
|
NetworkPluginSetupSerially bool `toml:"setup_serially" json:"setupSerially"`
|
|
// NetworkPluginConfTemplate is the file path of golang template used to generate cni config.
|
|
// When it is set, containerd will get cidr(s) from kubelet to replace {{.PodCIDR}},
|
|
// {{.PodCIDRRanges}} or {{.Routes}} in the template, and write the config into
|
|
// NetworkPluginConfDir.
|
|
// Ideally the cni config should be placed by system admin or cni daemon like calico,
|
|
// weaveworks etc. However, this is useful for the cases when there is no cni daemonset to place cni config.
|
|
// This allowed for very simple generic networking using the Kubernetes built in node pod CIDR IPAM, avoiding the
|
|
// need to fetch the node object through some external process (which has scalability, auth, complexity issues).
|
|
// It is currently heavily used in kubernetes-containerd CI testing
|
|
// NetworkPluginConfTemplate was once deprecated in containerd v1.7.0,
|
|
// but its deprecation was cancelled in v1.7.3.
|
|
NetworkPluginConfTemplate string `toml:"conf_template" json:"confTemplate"`
|
|
// IPPreference specifies the strategy to use when selecting the main IP address for a pod.
|
|
//
|
|
// Options include:
|
|
// * ipv4, "" - (default) select the first ipv4 address
|
|
// * ipv6 - select the first ipv6 address
|
|
// * cni - use the order returned by the CNI plugins, returning the first IP address from the results
|
|
IPPreference string `toml:"ip_pref" json:"ipPref"`
|
|
// UseInternalLoopback specifies if we use the CNI loopback plugin or internal mechanism to set lo to up
|
|
UseInternalLoopback bool `toml:"use_internal_loopback" json:"useInternalLoopback"`
|
|
}
|
|
|
|
// Mirror contains the config related to the registry mirror
|
|
type Mirror struct {
|
|
// Endpoints are endpoints for a namespace. CRI plugin will try the endpoints
|
|
// one by one until a working one is found. The endpoint must be a valid url
|
|
// with host specified.
|
|
// The scheme, host and path from the endpoint URL will be used.
|
|
Endpoints []string `toml:"endpoint" json:"endpoint"`
|
|
|
|
// Rewrites is a map of repository rewrite rules for a namespace. When fetching image resources
|
|
// from an endpoint and a key matches the repository via regular expression matching
|
|
// it will be replaced with the corresponding value from the map in the resource request.
|
|
//
|
|
// This example configures CRI to pull docker.io/library/* images from docker.io/my-org/*:
|
|
//
|
|
// [plugins]
|
|
// [plugins."io.containerd.grpc.v1.cri"]
|
|
// [plugins."io.containerd.grpc.v1.cri".registry]
|
|
// [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
|
|
// [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
|
|
// endpoint = ["https://registry-1.docker.io/v2"]
|
|
// [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io".rewrite]
|
|
// "^library/(.*)" = "my-org/$1"
|
|
//
|
|
Rewrites map[string]string `toml:"rewrite" json:"rewrite"`
|
|
}
|
|
|
|
// AuthConfig contains the config related to authentication to a specific registry
|
|
type AuthConfig struct {
|
|
// Username is the username to login the registry.
|
|
Username string `toml:"username" json:"username"`
|
|
// Password is the password to login the registry.
|
|
Password string `toml:"password" json:"password"`
|
|
// Auth is a base64 encoded string from the concatenation of the username,
|
|
// a colon, and the password.
|
|
Auth string `toml:"auth" json:"auth"`
|
|
// IdentityToken is used to authenticate the user and get
|
|
// an access token for the registry.
|
|
IdentityToken string `toml:"identitytoken" json:"identitytoken"`
|
|
}
|
|
|
|
// Registry is registry settings configured
|
|
type Registry struct {
|
|
// ConfigPath is a path to the root directory containing registry-specific
|
|
// configurations.
|
|
// If ConfigPath is set, the rest of the registry specific options are ignored.
|
|
ConfigPath string `toml:"config_path" json:"configPath"`
|
|
// Mirrors are namespace to mirror mapping for all namespaces.
|
|
// This option will not be used when ConfigPath is provided.
|
|
// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.0.
|
|
Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
|
|
// Configs are configs for each registry.
|
|
// The key is the domain name or IP of the registry.
|
|
// DEPRECATED: Use ConfigPath instead.
|
|
Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
|
|
// Auths are registry endpoint to auth config mapping. The registry endpoint must
|
|
// be a valid url with host specified.
|
|
// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.0, supported in 1.x releases.
|
|
Auths map[string]AuthConfig `toml:"auths" json:"auths"`
|
|
// Headers adds additional HTTP headers that get sent to all registries
|
|
Headers map[string][]string `toml:"headers" json:"headers"`
|
|
}
|
|
|
|
// RegistryConfig contains configuration used to communicate with the registry.
|
|
type RegistryConfig struct {
|
|
// Auth contains information to authenticate to the registry.
|
|
Auth *AuthConfig `toml:"auth" json:"auth"`
|
|
}
|
|
|
|
// ImageDecryption contains configuration to handling decryption of encrypted container images.
|
|
type ImageDecryption struct {
|
|
// KeyModel specifies the trust model of where keys should reside.
|
|
//
|
|
// Details of field usage can be found in:
|
|
// https://github.com/containerd/containerd/tree/main/docs/cri/config.md
|
|
//
|
|
// Details of key models can be found in:
|
|
// https://github.com/containerd/containerd/tree/main/docs/cri/decryption.md
|
|
KeyModel string `toml:"key_model" json:"keyModel"`
|
|
}
|
|
|
|
// ImagePlatform represents the platform to use for an image including the
|
|
// snapshotter to use. If snapshotter is not provided, the platform default
|
|
// can be assumed. When platform is not provided, the default platform can
|
|
// be assumed
|
|
type ImagePlatform struct {
|
|
Platform string `toml:"platform" json:"platform"`
|
|
// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
|
|
// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
|
|
// while using default snapshotters for operational simplicity.
|
|
// See https://github.com/containerd/containerd/issues/6657 for details.
|
|
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
|
|
}
|
|
|
|
type ImageConfig struct {
|
|
// Snapshotter is the snapshotter used by containerd.
|
|
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
|
|
|
|
// DisableSnapshotAnnotations disables to pass additional annotations (image
|
|
// related information) to snapshotters. These annotations are required by
|
|
// stargz snapshotter (https://github.com/containerd/stargz-snapshotter).
|
|
DisableSnapshotAnnotations bool `toml:"disable_snapshot_annotations" json:"disableSnapshotAnnotations"`
|
|
|
|
// DiscardUnpackedLayers is a boolean flag to specify whether to allow GC to
|
|
// remove layers from the content store after successfully unpacking these
|
|
// layers to the snapshotter.
|
|
DiscardUnpackedLayers bool `toml:"discard_unpacked_layers" json:"discardUnpackedLayers"`
|
|
|
|
// PinnedImages are images which the CRI plugin uses and should not be
|
|
// removed by the CRI client. The images have a key which can be used
|
|
// by other plugins to lookup the current image name.
|
|
// Image names should be full names including domain and tag
|
|
// Examples:
|
|
// "sandbox": "k8s.gcr.io/pause:3.10"
|
|
// "base": "docker.io/library/ubuntu:latest"
|
|
// Migrated from:
|
|
// (PluginConfig).SandboxImage string `toml:"sandbox_image" json:"sandboxImage"`
|
|
PinnedImages map[string]string `toml:"pinned_images" json:"pinned_images"`
|
|
|
|
// RuntimePlatforms is map between the runtime and the image platform to
|
|
// use for that runtime. When resolving an image for a runtime, this
|
|
// mapping will be used to select the image for the platform and the
|
|
// snapshotter for unpacking.
|
|
RuntimePlatforms map[string]ImagePlatform `toml:"runtime_platforms" json:"runtimePlatforms"`
|
|
|
|
// Registry contains config related to the registry
|
|
Registry Registry `toml:"registry" json:"registry"`
|
|
|
|
// ImageDecryption contains config related to handling decryption of encrypted container images
|
|
ImageDecryption `toml:"image_decryption" json:"imageDecryption"`
|
|
|
|
// MaxConcurrentDownloads restricts the number of concurrent downloads for each image.
|
|
// TODO: Migrate to transfer service
|
|
MaxConcurrentDownloads int `toml:"max_concurrent_downloads" json:"maxConcurrentDownloads"`
|
|
|
|
// ImagePullProgressTimeout is the maximum duration that there is no
|
|
// image data read from image registry in the open connection. It will
|
|
// be reset whatever a new byte has been read. If timeout, the image
|
|
// pulling will be cancelled. A zero value means there is no timeout.
|
|
//
|
|
// The string is in the golang duration format, see:
|
|
// https://golang.org/pkg/time/#ParseDuration
|
|
ImagePullProgressTimeout string `toml:"image_pull_progress_timeout" json:"imagePullProgressTimeout"`
|
|
|
|
// ImagePullWithSyncFs is an experimental setting. It's to force sync
|
|
// filesystem during unpacking to ensure that data integrity.
|
|
// TODO: Migrate to transfer service
|
|
ImagePullWithSyncFs bool `toml:"image_pull_with_sync_fs" json:"imagePullWithSyncFs"`
|
|
|
|
// StatsCollectPeriod is the period (in seconds) of snapshots stats collection.
|
|
StatsCollectPeriod int `toml:"stats_collect_period" json:"statsCollectPeriod"`
|
|
}
|
|
|
|
// RuntimeConfig contains toml config related to CRI plugin,
|
|
// it is a subset of Config.
|
|
type RuntimeConfig struct {
|
|
// ContainerdConfig contains config related to containerd
|
|
ContainerdConfig `toml:"containerd" json:"containerd"`
|
|
// CniConfig contains config related to cni
|
|
CniConfig `toml:"cni" json:"cni"`
|
|
// EnableSelinux indicates to enable the selinux support.
|
|
EnableSelinux bool `toml:"enable_selinux" json:"enableSelinux"`
|
|
// SelinuxCategoryRange allows the upper bound on the category range to be set.
|
|
// If not specified or set to 0, defaults to 1024 from the selinux package.
|
|
SelinuxCategoryRange int `toml:"selinux_category_range" json:"selinuxCategoryRange"`
|
|
// MaxContainerLogLineSize is the maximum log line size in bytes for a container.
|
|
// Log line longer than the limit will be split into multiple lines. Non-positive
|
|
// value means no limit.
|
|
MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogSize"`
|
|
// DisableApparmor indicates to disable the apparmor support.
|
|
// This is useful when the containerd does not have permission to access Apparmor.
|
|
DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"`
|
|
// RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's
|
|
// current OOMScoreADj.
|
|
// This is useful when the containerd does not have permission to decrease OOMScoreAdj.
|
|
RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"`
|
|
// DisableProcMount disables Kubernetes ProcMount support. This MUST be set to `true`
|
|
// when using containerd with Kubernetes <=1.11.
|
|
DisableProcMount bool `toml:"disable_proc_mount" json:"disableProcMount"`
|
|
// UnsetSeccompProfile is the profile containerd/cri will use If the provided seccomp profile is
|
|
// unset (`""`) for a container (default is `unconfined`)
|
|
UnsetSeccompProfile string `toml:"unset_seccomp_profile" json:"unsetSeccompProfile"`
|
|
// TolerateMissingHugetlbController if set to false will error out on create/update
|
|
// container requests with huge page limits if the cgroup controller for hugepages is not present.
|
|
// This helps with supporting Kubernetes <=1.18 out of the box. (default is `true`)
|
|
TolerateMissingHugetlbController bool `toml:"tolerate_missing_hugetlb_controller" json:"tolerateMissingHugetlbController"`
|
|
// DisableHugetlbController indicates to silently disable the hugetlb controller, even when it is
|
|
// present in /sys/fs/cgroup/cgroup.controllers.
|
|
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
|
|
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
|
|
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
|
|
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
|
|
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
|
|
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
|
|
// isolation, security and early detection of issues in the mount configuration when using
|
|
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
|
|
IgnoreImageDefinedVolumes bool `toml:"ignore_image_defined_volumes" json:"ignoreImageDefinedVolumes"`
|
|
// NetNSMountsUnderStateDir places all mounts for network namespaces under StateDir/netns instead
|
|
// of being placed under the hardcoded directory /var/run/netns. Changing this setting requires
|
|
// that all containers are deleted.
|
|
NetNSMountsUnderStateDir bool `toml:"netns_mounts_under_state_dir" json:"netnsMountsUnderStateDir"`
|
|
// EnableUnprivilegedPorts configures net.ipv4.ip_unprivileged_port_start=0
|
|
// for all containers which are not using host network
|
|
// and if it is not overwritten by PodSandboxConfig
|
|
EnableUnprivilegedPorts bool `toml:"enable_unprivileged_ports" json:"enableUnprivilegedPorts"`
|
|
// EnableUnprivilegedICMP configures net.ipv4.ping_group_range="0 2147483647"
|
|
// for all containers which are not using host network, are not running in user namespace
|
|
// and if it is not overwritten by PodSandboxConfig
|
|
EnableUnprivilegedICMP bool `toml:"enable_unprivileged_icmp" json:"enableUnprivilegedICMP"`
|
|
// EnableCDI indicates to enable injection of the Container Device Interface Specifications
|
|
// into the OCI config
|
|
// For more details about CDI and the syntax of CDI Spec files please refer to
|
|
// https://tags.cncf.io/container-device-interface.
|
|
EnableCDI bool `toml:"enable_cdi" json:"enableCDI"`
|
|
// CDISpecDirs is the list of directories to scan for Container Device Interface Specifications
|
|
// For more details about CDI configuration please refer to
|
|
// https://tags.cncf.io/container-device-interface#containerd-configuration
|
|
CDISpecDirs []string `toml:"cdi_spec_dirs" json:"cdiSpecDirs"`
|
|
|
|
// DrainExecSyncIOTimeout is the maximum duration to wait for ExecSync
|
|
// API' IO EOF event after exec init process exits. A zero value means
|
|
// there is no timeout.
|
|
//
|
|
// The string is in the golang duration format, see:
|
|
// https://golang.org/pkg/time/#ParseDuration
|
|
//
|
|
// For example, the value can be '5h', '2h30m', '10s'.
|
|
DrainExecSyncIOTimeout string `toml:"drain_exec_sync_io_timeout" json:"drainExecSyncIOTimeout"`
|
|
|
|
// IgnoreDeprecationWarnings is the list of the deprecation IDs (such as "io.containerd.deprecation/pull-schema-1-image")
|
|
// that should be ignored for checking "ContainerdHasNoDeprecationWarnings" condition.
|
|
IgnoreDeprecationWarnings []string `toml:"ignore_deprecation_warnings" json:"ignoreDeprecationWarnings"`
|
|
}
|
|
|
|
// X509KeyPairStreaming contains the x509 configuration for streaming
|
|
type X509KeyPairStreaming struct {
|
|
// TLSCertFile is the path to a certificate file
|
|
TLSCertFile string `toml:"tls_cert_file" json:"tlsCertFile"`
|
|
// TLSKeyFile is the path to a private key file
|
|
TLSKeyFile string `toml:"tls_key_file" json:"tlsKeyFile"`
|
|
}
|
|
|
|
// Config contains all configurations for CRI runtime plugin.
|
|
type Config struct {
|
|
// RuntimeConfig is the config for CRI runtime.
|
|
RuntimeConfig
|
|
// ContainerdRootDir is the root directory path for containerd.
|
|
ContainerdRootDir string `json:"containerdRootDir"`
|
|
// ContainerdEndpoint is the containerd endpoint path.
|
|
ContainerdEndpoint string `json:"containerdEndpoint"`
|
|
// RootDir is the root directory path for managing cri plugin files
|
|
// (metadata checkpoint etc.)
|
|
RootDir string `json:"rootDir"`
|
|
// StateDir is the root directory path for managing volatile pod/container data
|
|
StateDir string `json:"stateDir"`
|
|
}
|
|
|
|
// ServerConfig contains all the configuration for the CRI API server.
|
|
type ServerConfig struct {
|
|
// DisableTCPService disables serving CRI on the TCP server.
|
|
DisableTCPService bool `toml:"disable_tcp_service" json:"disableTCPService"`
|
|
// StreamServerAddress is the ip address streaming server is listening on.
|
|
StreamServerAddress string `toml:"stream_server_address" json:"streamServerAddress"`
|
|
// StreamServerPort is the port streaming server is listening on.
|
|
StreamServerPort string `toml:"stream_server_port" json:"streamServerPort"`
|
|
// StreamIdleTimeout is the maximum time a streaming connection
|
|
// can be idle before the connection is automatically closed.
|
|
// The string is in the golang duration format, see:
|
|
// https://golang.org/pkg/time/#ParseDuration
|
|
StreamIdleTimeout string `toml:"stream_idle_timeout" json:"streamIdleTimeout"`
|
|
// EnableTLSStreaming indicates to enable the TLS streaming support.
|
|
EnableTLSStreaming bool `toml:"enable_tls_streaming" json:"enableTLSStreaming"`
|
|
// X509KeyPairStreaming is a x509 key pair used for TLS streaming
|
|
X509KeyPairStreaming `toml:"x509_key_pair_streaming" json:"x509KeyPairStreaming"`
|
|
}
|
|
|
|
const (
|
|
// RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
|
|
RuntimeUntrusted = "untrusted"
|
|
// RuntimeDefault is the implicit runtime defined for ContainerdConfig.DefaultRuntime
|
|
RuntimeDefault = "default"
|
|
// KeyModelNode is the key model where key for encrypted images reside
|
|
// on the worker nodes
|
|
KeyModelNode = "node"
|
|
)
|
|
|
|
// ValidateImageConfig validates the given image configuration
|
|
func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.Warning, error) {
|
|
var warnings []deprecation.Warning
|
|
|
|
useConfigPath := c.Registry.ConfigPath != ""
|
|
if len(c.Registry.Mirrors) > 0 {
|
|
if useConfigPath {
|
|
return warnings, errors.New("`mirrors` cannot be set when `config_path` is provided")
|
|
}
|
|
warnings = append(warnings, deprecation.CRIRegistryMirrors)
|
|
log.G(ctx).Warning("`mirrors` is deprecated, please use `config_path` instead")
|
|
}
|
|
|
|
if len(c.Registry.Configs) != 0 {
|
|
warnings = append(warnings, deprecation.CRIRegistryConfigs)
|
|
log.G(ctx).Warning("`configs` is deprecated, please use `config_path` instead")
|
|
}
|
|
|
|
// Validation for deprecated auths options and mapping it to configs.
|
|
if len(c.Registry.Auths) != 0 {
|
|
if c.Registry.Configs == nil {
|
|
c.Registry.Configs = make(map[string]RegistryConfig)
|
|
}
|
|
for endpoint, auth := range c.Registry.Auths {
|
|
auth := auth
|
|
u, err := url.Parse(endpoint)
|
|
if err != nil {
|
|
return warnings, fmt.Errorf("failed to parse registry url %q from `registry.auths`: %w", endpoint, err)
|
|
}
|
|
if u.Scheme != "" {
|
|
// Do not include the scheme in the new registry config.
|
|
endpoint = u.Host
|
|
}
|
|
config := c.Registry.Configs[endpoint]
|
|
config.Auth = &auth
|
|
c.Registry.Configs[endpoint] = config
|
|
}
|
|
warnings = append(warnings, deprecation.CRIRegistryAuths)
|
|
log.G(ctx).Warning("`auths` is deprecated, please use `ImagePullSecrets` instead")
|
|
}
|
|
|
|
// Validation for image_pull_progress_timeout
|
|
if c.ImagePullProgressTimeout != "" {
|
|
if _, err := time.ParseDuration(c.ImagePullProgressTimeout); err != nil {
|
|
return warnings, fmt.Errorf("invalid image pull progress timeout: %w", err)
|
|
}
|
|
}
|
|
|
|
return warnings, nil
|
|
}
|
|
|
|
// ValidateRuntimeConfig validates the given runtime configuration.
|
|
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
|
|
var warnings []deprecation.Warning
|
|
if c.ContainerdConfig.Runtimes == nil {
|
|
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
|
|
}
|
|
|
|
// Validation for default_runtime_name
|
|
if c.ContainerdConfig.DefaultRuntimeName == "" {
|
|
return warnings, errors.New("`default_runtime_name` is empty")
|
|
}
|
|
if _, ok := c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName]; !ok {
|
|
return warnings, fmt.Errorf("no corresponding runtime configured in `containerd.runtimes` for `containerd` `default_runtime_name = \"%s\"", c.ContainerdConfig.DefaultRuntimeName)
|
|
}
|
|
|
|
for k, r := range c.ContainerdConfig.Runtimes {
|
|
if !r.PrivilegedWithoutHostDevices && r.PrivilegedWithoutHostDevicesAllDevicesAllowed {
|
|
return warnings, errors.New("`privileged_without_host_devices_all_devices_allowed` requires `privileged_without_host_devices` to be enabled")
|
|
}
|
|
// If empty, use default podSandbox mode
|
|
if len(r.Sandboxer) == 0 {
|
|
r.Sandboxer = string(ModePodSandbox)
|
|
c.ContainerdConfig.Runtimes[k] = r
|
|
}
|
|
|
|
if len(r.IOType) == 0 {
|
|
r.IOType = IOTypeFifo
|
|
}
|
|
if r.IOType != IOTypeStreaming && r.IOType != IOTypeFifo {
|
|
return warnings, errors.New("`io_type` can only be `streaming` or `named_pipe`")
|
|
}
|
|
}
|
|
|
|
// Validation for drain_exec_sync_io_timeout
|
|
if c.DrainExecSyncIOTimeout != "" {
|
|
if _, err := time.ParseDuration(c.DrainExecSyncIOTimeout); err != nil {
|
|
return warnings, fmt.Errorf("invalid `drain_exec_sync_io_timeout`: %w", err)
|
|
}
|
|
}
|
|
if err := ValidateEnableUnprivileged(ctx, c); err != nil {
|
|
return warnings, err
|
|
}
|
|
return warnings, nil
|
|
}
|
|
|
|
// ValidateServerConfig validates the given server configuration.
|
|
func ValidateServerConfig(ctx context.Context, c *ServerConfig) ([]deprecation.Warning, error) {
|
|
var warnings []deprecation.Warning
|
|
// Validation for stream_idle_timeout
|
|
if c.StreamIdleTimeout != "" {
|
|
if _, err := time.ParseDuration(c.StreamIdleTimeout); err != nil {
|
|
return warnings, fmt.Errorf("invalid stream idle timeout: %w", err)
|
|
}
|
|
}
|
|
return warnings, nil
|
|
}
|
|
|
|
func (config *Config) GetSandboxRuntime(podSandboxConfig *runtime.PodSandboxConfig, runtimeHandler string) (Runtime, error) {
|
|
if untrustedWorkload(podSandboxConfig) {
|
|
// If the untrusted annotation is provided, runtimeHandler MUST be empty.
|
|
if runtimeHandler != "" && runtimeHandler != RuntimeUntrusted {
|
|
return Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
|
|
}
|
|
|
|
// If the untrusted workload is requesting access to the host/node, this request will fail.
|
|
//
|
|
// Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
|
|
// runtime may support this. For example, in a virtual-machine isolated runtime, privileged
|
|
// is a supported option, granting the workload to access the entire guest VM instead of host.
|
|
// TODO(windows): Deprecate this so that we don't need to handle it for windows.
|
|
if hostAccessingSandbox(podSandboxConfig) {
|
|
return Runtime{}, errors.New("untrusted workload with host access is not allowed")
|
|
}
|
|
|
|
runtimeHandler = RuntimeUntrusted
|
|
}
|
|
|
|
if runtimeHandler == "" {
|
|
runtimeHandler = config.DefaultRuntimeName
|
|
}
|
|
|
|
r, ok := config.Runtimes[runtimeHandler]
|
|
if !ok {
|
|
return Runtime{}, fmt.Errorf("no runtime for %q is configured", runtimeHandler)
|
|
}
|
|
return r, nil
|
|
|
|
}
|
|
|
|
// untrustedWorkload returns true if the sandbox contains untrusted workload.
|
|
func untrustedWorkload(config *runtime.PodSandboxConfig) bool {
|
|
return config.GetAnnotations()[annotations.UntrustedWorkload] == "true"
|
|
}
|
|
|
|
// hostAccessingSandbox returns true if the sandbox configuration
|
|
// requires additional host access for the sandbox.
|
|
func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool {
|
|
securityContext := config.GetLinux().GetSecurityContext()
|
|
|
|
namespaceOptions := securityContext.GetNamespaceOptions()
|
|
if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE ||
|
|
namespaceOptions.GetPid() == runtime.NamespaceMode_NODE ||
|
|
namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// GenerateRuntimeOptions generates runtime options from cri plugin config.
|
|
func GenerateRuntimeOptions(r Runtime) (interface{}, error) {
|
|
if r.Options == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
b, err := toml.Marshal(r.Options)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to marshal TOML blob for runtime %q: %w", r.Type, err)
|
|
}
|
|
|
|
options := getRuntimeOptionsType(r.Type)
|
|
if err := toml.Unmarshal(b, options); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// For generic configuration, if no config path specified (preserving old behavior), pass
|
|
// the whole TOML configuration section to the runtime.
|
|
if runtimeOpts, ok := options.(*runtimeoptions.Options); ok && runtimeOpts.ConfigPath == "" {
|
|
runtimeOpts.ConfigBody = b
|
|
}
|
|
|
|
return options, nil
|
|
}
|
|
|
|
// getRuntimeOptionsType gets empty runtime options by the runtime type name.
|
|
func getRuntimeOptionsType(t string) interface{} {
|
|
switch t {
|
|
case plugins.RuntimeRuncV2:
|
|
return &runcoptions.Options{}
|
|
case plugins.RuntimeRunhcsV1:
|
|
return &runhcsoptions.Options{}
|
|
default:
|
|
return &runtimeoptions.Options{}
|
|
}
|
|
}
|
|
|
|
func DefaultServerConfig() ServerConfig {
|
|
return ServerConfig{
|
|
DisableTCPService: true,
|
|
StreamServerAddress: "127.0.0.1",
|
|
StreamServerPort: "0",
|
|
StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
|
|
EnableTLSStreaming: false,
|
|
X509KeyPairStreaming: X509KeyPairStreaming{
|
|
TLSKeyFile: "",
|
|
TLSCertFile: "",
|
|
},
|
|
}
|
|
}
|