Move CRI from pkg/ to internal/
Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
This commit is contained in:
806
internal/cri/config/config.go
Normal file
806
internal/cri/config/config.go
Normal file
@@ -0,0 +1,806 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
goruntime "runtime"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
|
||||
apitypes "github.com/containerd/containerd/v2/api/types"
|
||||
"github.com/containerd/containerd/v2/protobuf"
|
||||
"github.com/containerd/log"
|
||||
"github.com/containerd/typeurl/v2"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/kubelet/pkg/cri/streaming"
|
||||
|
||||
runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
|
||||
runcoptions "github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
|
||||
"github.com/containerd/containerd/v2/internal/cri/annotations"
|
||||
"github.com/containerd/containerd/v2/pkg/deprecation"
|
||||
runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
|
||||
"github.com/containerd/containerd/v2/plugins"
|
||||
"github.com/opencontainers/image-spec/specs-go"
|
||||
"github.com/opencontainers/runtime-spec/specs-go/features"
|
||||
)
|
||||
|
||||
func init() {
|
||||
const prefix = "types.containerd.io"
|
||||
major := strconv.Itoa(specs.VersionMajor)
|
||||
typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
|
||||
}
|
||||
|
||||
const (
|
||||
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
|
||||
//
|
||||
// NOTE:
|
||||
//
|
||||
// This ImagePullProgressTimeout feature is ported from kubelet/dockershim's
|
||||
// --image-pull-progress-deadline. The original value is 1m0. Unlike docker
|
||||
// daemon, the containerd doesn't have global concurrent download limitation
|
||||
// before migrating to Transfer Service. If kubelet runs with concurrent
|
||||
// image pull, the node will run under IO pressure. The ImagePull process
|
||||
// could be impacted by self, if the target image is large one with a
|
||||
// lot of layers. And also both container's writable layers and image's storage
|
||||
// share one disk. The ImagePull process commits blob to content store
|
||||
// with fsync, which might bring the unrelated files' dirty pages into
|
||||
// disk in one transaction [1]. The 1m0 value isn't good enough. Based
|
||||
// on #9347 case and kubernetes community's usage [2], the default value
|
||||
// is updated to 5m0. If end-user still runs into unexpected cancel,
|
||||
// they need to config it based on their environment.
|
||||
//
|
||||
// [1]: Fast commits for ext4 - https://lwn.net/Articles/842385/
|
||||
// [2]: https://github.com/kubernetes/kubernetes/blob/1635c380b26a1d8cc25d36e9feace9797f4bae3c/cluster/gce/util.sh#L882
|
||||
defaultImagePullProgressTimeoutDuration = 5 * time.Minute
|
||||
)
|
||||
|
||||
type SandboxControllerMode string
|
||||
|
||||
const (
|
||||
// ModePodSandbox means use Controller implementation from sbserver podsandbox package.
|
||||
// We take this one as a default mode.
|
||||
ModePodSandbox SandboxControllerMode = "podsandbox"
|
||||
// ModeShim means use whatever Controller implementation provided by shim.
|
||||
ModeShim SandboxControllerMode = "shim"
|
||||
// DefaultSandboxImage is the default image to use for sandboxes when empty or
|
||||
// for default configurations.
|
||||
DefaultSandboxImage = "registry.k8s.io/pause:3.9"
|
||||
)
|
||||
|
||||
// Ternary represents a ternary value.
|
||||
// Ternary is needed because TOML does not accept "null" for boolean values.
|
||||
type Ternary = string
|
||||
|
||||
const (
|
||||
TernaryEmpty Ternary = "" // alias for IfPossible
|
||||
TernaryEnabled Ternary = "Enabled"
|
||||
TernaryIfPossible Ternary = "IfPossible"
|
||||
TernaryDisabled Ternary = "Disabled"
|
||||
)
|
||||
|
||||
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
|
||||
// and a runtime for untrusted workload.
|
||||
type Runtime struct {
|
||||
// Type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
|
||||
Type string `toml:"runtime_type" json:"runtimeType"`
|
||||
// Path is an optional field that can be used to overwrite path to a shim runtime binary.
|
||||
// When specified, containerd will ignore runtime name field when resolving shim location.
|
||||
// Path must be abs.
|
||||
Path string `toml:"runtime_path" json:"runtimePath"`
|
||||
// PodAnnotations is a list of pod annotations passed to both pod sandbox as well as
|
||||
// container OCI annotations.
|
||||
PodAnnotations []string `toml:"pod_annotations" json:"PodAnnotations"`
|
||||
// ContainerAnnotations is a list of container annotations passed through to the OCI config of the containers.
|
||||
// Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
|
||||
// Currently, only device plugins populate the annotations.
|
||||
ContainerAnnotations []string `toml:"container_annotations" json:"ContainerAnnotations"`
|
||||
// Options are config options for the runtime.
|
||||
Options map[string]interface{} `toml:"options" json:"options"`
|
||||
// PrivilegedWithoutHostDevices overloads the default behaviour for adding host devices to the
|
||||
// runtime spec when the container is privileged. Defaults to false.
|
||||
PrivilegedWithoutHostDevices bool `toml:"privileged_without_host_devices" json:"privileged_without_host_devices"`
|
||||
// PrivilegedWithoutHostDevicesAllDevicesAllowed overloads the default behaviour device allowlisting when
|
||||
// to the runtime spec when the container when PrivilegedWithoutHostDevices is already enabled. Requires
|
||||
// PrivilegedWithoutHostDevices to be enabled. Defaults to false.
|
||||
PrivilegedWithoutHostDevicesAllDevicesAllowed bool `toml:"privileged_without_host_devices_all_devices_allowed" json:"privileged_without_host_devices_all_devices_allowed"`
|
||||
// BaseRuntimeSpec is a json file with OCI spec to use as base spec that all container's will be created from.
|
||||
BaseRuntimeSpec string `toml:"base_runtime_spec" json:"baseRuntimeSpec"`
|
||||
// NetworkPluginConfDir is a directory containing the CNI network information for the runtime class.
|
||||
NetworkPluginConfDir string `toml:"cni_conf_dir" json:"cniConfDir"`
|
||||
// NetworkPluginMaxConfNum is the max number of plugin config files that will
|
||||
// be loaded from the cni config directory by go-cni. Set the value to 0 to
|
||||
// load all config files (no arbitrary limit). The legacy default value is 1.
|
||||
NetworkPluginMaxConfNum int `toml:"cni_max_conf_num" json:"cniMaxConfNum"`
|
||||
// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
|
||||
// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
|
||||
// while using default snapshotters for operational simplicity.
|
||||
// See https://github.com/containerd/containerd/issues/6657 for details.
|
||||
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
|
||||
// Sandboxer defines which sandbox runtime to use when scheduling pods
|
||||
// This features requires the new CRI server implementation (enabled by default in 2.0)
|
||||
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
|
||||
// podsandbox - means use Controller implementation from sbserver podsandbox package.
|
||||
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
|
||||
|
||||
// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
|
||||
// treats read-only mounts as recursive read-only mounts.
|
||||
// An empty string means "IfPossible".
|
||||
// "Enabled" requires Linux kernel v5.12 or later.
|
||||
// Introduced in containerd v2.0.
|
||||
// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
|
||||
TreatRoMountsAsRro Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
|
||||
TreatRoMountsAsRroResolved bool `toml:"-" json:"-"` // Do not set manually
|
||||
}
|
||||
|
||||
// ContainerdConfig contains toml config related to containerd
|
||||
type ContainerdConfig struct {
|
||||
// DefaultRuntimeName is the default runtime name to use from the runtimes table.
|
||||
DefaultRuntimeName string `toml:"default_runtime_name" json:"defaultRuntimeName"`
|
||||
|
||||
// Runtimes is a map from CRI RuntimeHandler strings, which specify types of runtime
|
||||
// configurations, to the matching configurations.
|
||||
Runtimes map[string]Runtime `toml:"runtimes" json:"runtimes"`
|
||||
|
||||
// IgnoreBlockIONotEnabledErrors is a boolean flag to ignore
|
||||
// blockio related errors when blockio support has not been
|
||||
// enabled.
|
||||
IgnoreBlockIONotEnabledErrors bool `toml:"ignore_blockio_not_enabled_errors" json:"ignoreBlockIONotEnabledErrors"`
|
||||
|
||||
// IgnoreRdtNotEnabledErrors is a boolean flag to ignore RDT related errors
|
||||
// when RDT support has not been enabled.
|
||||
IgnoreRdtNotEnabledErrors bool `toml:"ignore_rdt_not_enabled_errors" json:"ignoreRdtNotEnabledErrors"`
|
||||
}
|
||||
|
||||
// CniConfig contains toml config related to cni
|
||||
type CniConfig struct {
|
||||
// NetworkPluginBinDir is the directory in which the binaries for the plugin is kept.
|
||||
NetworkPluginBinDir string `toml:"bin_dir" json:"binDir"`
|
||||
// NetworkPluginConfDir is the directory in which the admin places a CNI conf.
|
||||
NetworkPluginConfDir string `toml:"conf_dir" json:"confDir"`
|
||||
// NetworkPluginMaxConfNum is the max number of plugin config files that will
|
||||
// be loaded from the cni config directory by go-cni. Set the value to 0 to
|
||||
// load all config files (no arbitrary limit). The legacy default value is 1.
|
||||
NetworkPluginMaxConfNum int `toml:"max_conf_num" json:"maxConfNum"`
|
||||
// NetworkPluginSetupSerially is a boolean flag to specify whether containerd sets up networks serially
|
||||
// if there are multiple CNI plugin config files existing and NetworkPluginMaxConfNum is larger than 1.
|
||||
//
|
||||
// NOTE: On the Linux platform, containerd provides loopback network
|
||||
// configuration by default. There are at least two network plugins.
|
||||
// The default value of NetworkPluginSetupSerially is false which means
|
||||
// the loopback and eth0 are handled in parallel mode. Since the loopback
|
||||
// device is created as the net namespace is created, it's safe to run
|
||||
// in parallel mode as the default setting.
|
||||
NetworkPluginSetupSerially bool `toml:"setup_serially" json:"setupSerially"`
|
||||
// NetworkPluginConfTemplate is the file path of golang template used to generate cni config.
|
||||
// When it is set, containerd will get cidr(s) from kubelet to replace {{.PodCIDR}},
|
||||
// {{.PodCIDRRanges}} or {{.Routes}} in the template, and write the config into
|
||||
// NetworkPluginConfDir.
|
||||
// Ideally the cni config should be placed by system admin or cni daemon like calico,
|
||||
// weaveworks etc. However, this is useful for the cases when there is no cni daemonset to place cni config.
|
||||
// This allowed for very simple generic networking using the Kubernetes built in node pod CIDR IPAM, avoiding the
|
||||
// need to fetch the node object through some external process (which has scalability, auth, complexity issues).
|
||||
// It is currently heavily used in kubernetes-containerd CI testing
|
||||
// NetworkPluginConfTemplate was once deprecated in containerd v1.7.0,
|
||||
// but its deprecation was cancelled in v1.7.3.
|
||||
NetworkPluginConfTemplate string `toml:"conf_template" json:"confTemplate"`
|
||||
// IPPreference specifies the strategy to use when selecting the main IP address for a pod.
|
||||
//
|
||||
// Options include:
|
||||
// * ipv4, "" - (default) select the first ipv4 address
|
||||
// * ipv6 - select the first ipv6 address
|
||||
// * cni - use the order returned by the CNI plugins, returning the first IP address from the results
|
||||
IPPreference string `toml:"ip_pref" json:"ipPref"`
|
||||
}
|
||||
|
||||
// Mirror contains the config related to the registry mirror
|
||||
type Mirror struct {
|
||||
// Endpoints are endpoints for a namespace. CRI plugin will try the endpoints
|
||||
// one by one until a working one is found. The endpoint must be a valid url
|
||||
// with host specified.
|
||||
// The scheme, host and path from the endpoint URL will be used.
|
||||
Endpoints []string `toml:"endpoint" json:"endpoint"`
|
||||
}
|
||||
|
||||
// AuthConfig contains the config related to authentication to a specific registry
|
||||
type AuthConfig struct {
|
||||
// Username is the username to login the registry.
|
||||
Username string `toml:"username" json:"username"`
|
||||
// Password is the password to login the registry.
|
||||
Password string `toml:"password" json:"password"`
|
||||
// Auth is a base64 encoded string from the concatenation of the username,
|
||||
// a colon, and the password.
|
||||
Auth string `toml:"auth" json:"auth"`
|
||||
// IdentityToken is used to authenticate the user and get
|
||||
// an access token for the registry.
|
||||
IdentityToken string `toml:"identitytoken" json:"identitytoken"`
|
||||
}
|
||||
|
||||
// Registry is registry settings configured
|
||||
type Registry struct {
|
||||
// ConfigPath is a path to the root directory containing registry-specific
|
||||
// configurations.
|
||||
// If ConfigPath is set, the rest of the registry specific options are ignored.
|
||||
ConfigPath string `toml:"config_path" json:"configPath"`
|
||||
// Mirrors are namespace to mirror mapping for all namespaces.
|
||||
// This option will not be used when ConfigPath is provided.
|
||||
// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.0.
|
||||
Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
|
||||
// Configs are configs for each registry.
|
||||
// The key is the domain name or IP of the registry.
|
||||
// DEPRECATED: Use ConfigPath instead.
|
||||
Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
|
||||
// Auths are registry endpoint to auth config mapping. The registry endpoint must
|
||||
// be a valid url with host specified.
|
||||
// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.0, supported in 1.x releases.
|
||||
Auths map[string]AuthConfig `toml:"auths" json:"auths"`
|
||||
// Headers adds additional HTTP headers that get sent to all registries
|
||||
Headers map[string][]string `toml:"headers" json:"headers"`
|
||||
}
|
||||
|
||||
// RegistryConfig contains configuration used to communicate with the registry.
|
||||
type RegistryConfig struct {
|
||||
// Auth contains information to authenticate to the registry.
|
||||
Auth *AuthConfig `toml:"auth" json:"auth"`
|
||||
}
|
||||
|
||||
// ImageDecryption contains configuration to handling decryption of encrypted container images.
|
||||
type ImageDecryption struct {
|
||||
// KeyModel specifies the trust model of where keys should reside.
|
||||
//
|
||||
// Details of field usage can be found in:
|
||||
// https://github.com/containerd/containerd/tree/main/docs/cri/config.md
|
||||
//
|
||||
// Details of key models can be found in:
|
||||
// https://github.com/containerd/containerd/tree/main/docs/cri/decryption.md
|
||||
KeyModel string `toml:"key_model" json:"keyModel"`
|
||||
}
|
||||
|
||||
// ImagePlatform represents the platform to use for an image including the
|
||||
// snapshotter to use. If snapshotter is not provided, the platform default
|
||||
// can be assumed. When platform is not provided, the default platform can
|
||||
// be assumed
|
||||
type ImagePlatform struct {
|
||||
Platform string `toml:"platform" json:"platform"`
|
||||
// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
|
||||
// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
|
||||
// while using default snapshotters for operational simplicity.
|
||||
// See https://github.com/containerd/containerd/issues/6657 for details.
|
||||
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
|
||||
}
|
||||
|
||||
type ImageConfig struct {
|
||||
// Snapshotter is the snapshotter used by containerd.
|
||||
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
|
||||
|
||||
// DisableSnapshotAnnotations disables to pass additional annotations (image
|
||||
// related information) to snapshotters. These annotations are required by
|
||||
// stargz snapshotter (https://github.com/containerd/stargz-snapshotter).
|
||||
DisableSnapshotAnnotations bool `toml:"disable_snapshot_annotations" json:"disableSnapshotAnnotations"`
|
||||
|
||||
// DiscardUnpackedLayers is a boolean flag to specify whether to allow GC to
|
||||
// remove layers from the content store after successfully unpacking these
|
||||
// layers to the snapshotter.
|
||||
DiscardUnpackedLayers bool `toml:"discard_unpacked_layers" json:"discardUnpackedLayers"`
|
||||
|
||||
// PinnedImages are images which the CRI plugin uses and should not be
|
||||
// removed by the CRI client. The images have a key which can be used
|
||||
// by other plugins to lookup the current image name.
|
||||
// Image names should be full names including domain and tag
|
||||
// Examples:
|
||||
// "sandbox": "k8s.gcr.io/pause:3.9"
|
||||
// "base": "docker.io/library/ubuntu:latest"
|
||||
// Migrated from:
|
||||
// (PluginConfig).SandboxImage string `toml:"sandbox_image" json:"sandboxImage"`
|
||||
PinnedImages map[string]string
|
||||
|
||||
// RuntimePlatforms is map between the runtime and the image platform to
|
||||
// use for that runtime. When resolving an image for a runtime, this
|
||||
// mapping will be used to select the image for the platform and the
|
||||
// snapshotter for unpacking.
|
||||
RuntimePlatforms map[string]ImagePlatform `toml:"runtime_platforms" json:"runtimePlatforms"`
|
||||
|
||||
// Registry contains config related to the registry
|
||||
Registry Registry `toml:"registry" json:"registry"`
|
||||
|
||||
// ImageDecryption contains config related to handling decryption of encrypted container images
|
||||
ImageDecryption `toml:"image_decryption" json:"imageDecryption"`
|
||||
|
||||
// MaxConcurrentDownloads restricts the number of concurrent downloads for each image.
|
||||
// TODO: Migrate to transfer service
|
||||
MaxConcurrentDownloads int `toml:"max_concurrent_downloads" json:"maxConcurrentDownloads"`
|
||||
|
||||
// ImagePullProgressTimeout is the maximum duration that there is no
|
||||
// image data read from image registry in the open connection. It will
|
||||
// be reset whatever a new byte has been read. If timeout, the image
|
||||
// pulling will be cancelled. A zero value means there is no timeout.
|
||||
//
|
||||
// The string is in the golang duration format, see:
|
||||
// https://golang.org/pkg/time/#ParseDuration
|
||||
ImagePullProgressTimeout string `toml:"image_pull_progress_timeout" json:"imagePullProgressTimeout"`
|
||||
|
||||
// ImagePullWithSyncFs is an experimental setting. It's to force sync
|
||||
// filesystem during unpacking to ensure that data integrity.
|
||||
// TODO: Migrate to transfer service
|
||||
ImagePullWithSyncFs bool `toml:"image_pull_with_sync_fs" json:"imagePullWithSyncFs"`
|
||||
|
||||
// StatsCollectPeriod is the period (in seconds) of snapshots stats collection.
|
||||
StatsCollectPeriod int `toml:"stats_collect_period" json:"statsCollectPeriod"`
|
||||
}
|
||||
|
||||
// RuntimeConfig contains toml config related to CRI plugin,
|
||||
// it is a subset of Config.
|
||||
type RuntimeConfig struct {
|
||||
// ContainerdConfig contains config related to containerd
|
||||
ContainerdConfig `toml:"containerd" json:"containerd"`
|
||||
// CniConfig contains config related to cni
|
||||
CniConfig `toml:"cni" json:"cni"`
|
||||
// EnableSelinux indicates to enable the selinux support.
|
||||
EnableSelinux bool `toml:"enable_selinux" json:"enableSelinux"`
|
||||
// SelinuxCategoryRange allows the upper bound on the category range to be set.
|
||||
// If not specified or set to 0, defaults to 1024 from the selinux package.
|
||||
SelinuxCategoryRange int `toml:"selinux_category_range" json:"selinuxCategoryRange"`
|
||||
// MaxContainerLogLineSize is the maximum log line size in bytes for a container.
|
||||
// Log line longer than the limit will be split into multiple lines. Non-positive
|
||||
// value means no limit.
|
||||
MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogSize"`
|
||||
// DisableCgroup indicates to disable the cgroup support.
|
||||
// This is useful when the containerd does not have permission to access cgroup.
|
||||
DisableCgroup bool `toml:"disable_cgroup" json:"disableCgroup"`
|
||||
// DisableApparmor indicates to disable the apparmor support.
|
||||
// This is useful when the containerd does not have permission to access Apparmor.
|
||||
DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"`
|
||||
// RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's
|
||||
// current OOMScoreADj.
|
||||
// This is useful when the containerd does not have permission to decrease OOMScoreAdj.
|
||||
RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"`
|
||||
// DisableProcMount disables Kubernetes ProcMount support. This MUST be set to `true`
|
||||
// when using containerd with Kubernetes <=1.11.
|
||||
DisableProcMount bool `toml:"disable_proc_mount" json:"disableProcMount"`
|
||||
// UnsetSeccompProfile is the profile containerd/cri will use If the provided seccomp profile is
|
||||
// unset (`""`) for a container (default is `unconfined`)
|
||||
UnsetSeccompProfile string `toml:"unset_seccomp_profile" json:"unsetSeccompProfile"`
|
||||
// TolerateMissingHugetlbController if set to false will error out on create/update
|
||||
// container requests with huge page limits if the cgroup controller for hugepages is not present.
|
||||
// This helps with supporting Kubernetes <=1.18 out of the box. (default is `true`)
|
||||
TolerateMissingHugetlbController bool `toml:"tolerate_missing_hugetlb_controller" json:"tolerateMissingHugetlbController"`
|
||||
// DisableHugetlbController indicates to silently disable the hugetlb controller, even when it is
|
||||
// present in /sys/fs/cgroup/cgroup.controllers.
|
||||
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
|
||||
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
|
||||
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
|
||||
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
|
||||
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
|
||||
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
|
||||
// isolation, security and early detection of issues in the mount configuration when using
|
||||
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
|
||||
IgnoreImageDefinedVolumes bool `toml:"ignore_image_defined_volumes" json:"ignoreImageDefinedVolumes"`
|
||||
// NetNSMountsUnderStateDir places all mounts for network namespaces under StateDir/netns instead
|
||||
// of being placed under the hardcoded directory /var/run/netns. Changing this setting requires
|
||||
// that all containers are deleted.
|
||||
NetNSMountsUnderStateDir bool `toml:"netns_mounts_under_state_dir" json:"netnsMountsUnderStateDir"`
|
||||
// EnableUnprivilegedPorts configures net.ipv4.ip_unprivileged_port_start=0
|
||||
// for all containers which are not using host network
|
||||
// and if it is not overwritten by PodSandboxConfig
|
||||
// Note that currently default is set to disabled but target change it in future, see:
|
||||
// https://github.com/kubernetes/kubernetes/issues/102612
|
||||
EnableUnprivilegedPorts bool `toml:"enable_unprivileged_ports" json:"enableUnprivilegedPorts"`
|
||||
// EnableUnprivilegedICMP configures net.ipv4.ping_group_range="0 2147483647"
|
||||
// for all containers which are not using host network, are not running in user namespace
|
||||
// and if it is not overwritten by PodSandboxConfig
|
||||
// Note that currently default is set to disabled but target change it in future together with EnableUnprivilegedPorts
|
||||
EnableUnprivilegedICMP bool `toml:"enable_unprivileged_icmp" json:"enableUnprivilegedICMP"`
|
||||
// EnableCDI indicates to enable injection of the Container Device Interface Specifications
|
||||
// into the OCI config
|
||||
// For more details about CDI and the syntax of CDI Spec files please refer to
|
||||
// https://tags.cncf.io/container-device-interface.
|
||||
EnableCDI bool `toml:"enable_cdi" json:"enableCDI"`
|
||||
// CDISpecDirs is the list of directories to scan for Container Device Interface Specifications
|
||||
// For more details about CDI configuration please refer to
|
||||
// https://tags.cncf.io/container-device-interface#containerd-configuration
|
||||
CDISpecDirs []string `toml:"cdi_spec_dirs" json:"cdiSpecDirs"`
|
||||
|
||||
// DrainExecSyncIOTimeout is the maximum duration to wait for ExecSync
|
||||
// API' IO EOF event after exec init process exits. A zero value means
|
||||
// there is no timeout.
|
||||
//
|
||||
// The string is in the golang duration format, see:
|
||||
// https://golang.org/pkg/time/#ParseDuration
|
||||
//
|
||||
// For example, the value can be '5h', '2h30m', '10s'.
|
||||
DrainExecSyncIOTimeout string `toml:"drain_exec_sync_io_timeout" json:"drainExecSyncIOTimeout"`
|
||||
}
|
||||
|
||||
// X509KeyPairStreaming contains the x509 configuration for streaming
|
||||
type X509KeyPairStreaming struct {
|
||||
// TLSCertFile is the path to a certificate file
|
||||
TLSCertFile string `toml:"tls_cert_file" json:"tlsCertFile"`
|
||||
// TLSKeyFile is the path to a private key file
|
||||
TLSKeyFile string `toml:"tls_key_file" json:"tlsKeyFile"`
|
||||
}
|
||||
|
||||
// Config contains all configurations for CRI runtime plugin.
|
||||
type Config struct {
|
||||
// RuntimeConfig is the config for CRI runtime.
|
||||
RuntimeConfig
|
||||
// ContainerdRootDir is the root directory path for containerd.
|
||||
ContainerdRootDir string `json:"containerdRootDir"`
|
||||
// ContainerdEndpoint is the containerd endpoint path.
|
||||
ContainerdEndpoint string `json:"containerdEndpoint"`
|
||||
// RootDir is the root directory path for managing cri plugin files
|
||||
// (metadata checkpoint etc.)
|
||||
RootDir string `json:"rootDir"`
|
||||
// StateDir is the root directory path for managing volatile pod/container data
|
||||
StateDir string `json:"stateDir"`
|
||||
}
|
||||
|
||||
// ServerConfig contains all the configuration for the CRI API server.
|
||||
type ServerConfig struct {
|
||||
// DisableTCPService disables serving CRI on the TCP server.
|
||||
DisableTCPService bool `toml:"disable_tcp_service" json:"disableTCPService"`
|
||||
// StreamServerAddress is the ip address streaming server is listening on.
|
||||
StreamServerAddress string `toml:"stream_server_address" json:"streamServerAddress"`
|
||||
// StreamServerPort is the port streaming server is listening on.
|
||||
StreamServerPort string `toml:"stream_server_port" json:"streamServerPort"`
|
||||
// StreamIdleTimeout is the maximum time a streaming connection
|
||||
// can be idle before the connection is automatically closed.
|
||||
// The string is in the golang duration format, see:
|
||||
// https://golang.org/pkg/time/#ParseDuration
|
||||
StreamIdleTimeout string `toml:"stream_idle_timeout" json:"streamIdleTimeout"`
|
||||
// EnableTLSStreaming indicates to enable the TLS streaming support.
|
||||
EnableTLSStreaming bool `toml:"enable_tls_streaming" json:"enableTLSStreaming"`
|
||||
// X509KeyPairStreaming is a x509 key pair used for TLS streaming
|
||||
X509KeyPairStreaming `toml:"x509_key_pair_streaming" json:"x509KeyPairStreaming"`
|
||||
}
|
||||
|
||||
const (
|
||||
// RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
|
||||
RuntimeUntrusted = "untrusted"
|
||||
// RuntimeDefault is the implicit runtime defined for ContainerdConfig.DefaultRuntime
|
||||
RuntimeDefault = "default"
|
||||
// KeyModelNode is the key model where key for encrypted images reside
|
||||
// on the worker nodes
|
||||
KeyModelNode = "node"
|
||||
)
|
||||
|
||||
// ValidateImageConfig validates the given image configuration
|
||||
func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.Warning, error) {
|
||||
var warnings []deprecation.Warning
|
||||
|
||||
useConfigPath := c.Registry.ConfigPath != ""
|
||||
if len(c.Registry.Mirrors) > 0 {
|
||||
if useConfigPath {
|
||||
return warnings, errors.New("`mirrors` cannot be set when `config_path` is provided")
|
||||
}
|
||||
warnings = append(warnings, deprecation.CRIRegistryMirrors)
|
||||
log.G(ctx).Warning("`mirrors` is deprecated, please use `config_path` instead")
|
||||
}
|
||||
|
||||
if len(c.Registry.Configs) != 0 {
|
||||
warnings = append(warnings, deprecation.CRIRegistryConfigs)
|
||||
log.G(ctx).Warning("`configs` is deprecated, please use `config_path` instead")
|
||||
}
|
||||
|
||||
// Validation for deprecated auths options and mapping it to configs.
|
||||
if len(c.Registry.Auths) != 0 {
|
||||
if c.Registry.Configs == nil {
|
||||
c.Registry.Configs = make(map[string]RegistryConfig)
|
||||
}
|
||||
for endpoint, auth := range c.Registry.Auths {
|
||||
auth := auth
|
||||
u, err := url.Parse(endpoint)
|
||||
if err != nil {
|
||||
return warnings, fmt.Errorf("failed to parse registry url %q from `registry.auths`: %w", endpoint, err)
|
||||
}
|
||||
if u.Scheme != "" {
|
||||
// Do not include the scheme in the new registry config.
|
||||
endpoint = u.Host
|
||||
}
|
||||
config := c.Registry.Configs[endpoint]
|
||||
config.Auth = &auth
|
||||
c.Registry.Configs[endpoint] = config
|
||||
}
|
||||
warnings = append(warnings, deprecation.CRIRegistryAuths)
|
||||
log.G(ctx).Warning("`auths` is deprecated, please use `ImagePullSecrets` instead")
|
||||
}
|
||||
|
||||
// Validation for image_pull_progress_timeout
|
||||
if c.ImagePullProgressTimeout != "" {
|
||||
if _, err := time.ParseDuration(c.ImagePullProgressTimeout); err != nil {
|
||||
return warnings, fmt.Errorf("invalid image pull progress timeout: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return warnings, nil
|
||||
}
|
||||
|
||||
func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
|
||||
if introspectionClient == nil { // happens for unit tests
|
||||
return nil, errors.New("introspectionClient is nil")
|
||||
}
|
||||
infoReq := &introspectionapi.PluginInfoRequest{
|
||||
Type: string(plugins.RuntimePluginV2),
|
||||
ID: "task",
|
||||
}
|
||||
rr := &apitypes.RuntimeRequest{
|
||||
RuntimePath: r.Type,
|
||||
}
|
||||
if r.Path != "" {
|
||||
rr.RuntimePath = r.Path
|
||||
}
|
||||
options, err := GenerateRuntimeOptions(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rr.Options, err = protobuf.MarshalAnyToProto(options)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
|
||||
}
|
||||
infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
|
||||
}
|
||||
infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
|
||||
}
|
||||
var info apitypes.RuntimeInfo
|
||||
if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
|
||||
return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
|
||||
}
|
||||
featuresX, err := typeurl.UnmarshalAny(info.Features)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
|
||||
}
|
||||
features, ok := featuresX.(*features.Features)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown features type %T", featuresX)
|
||||
}
|
||||
return features, nil
|
||||
}
|
||||
|
||||
// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
|
||||
func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
|
||||
debugPrefix := "treat_ro_mounts_as_rro"
|
||||
if r.Type != "" {
|
||||
debugPrefix += fmt.Sprintf("[%s]", r.Type)
|
||||
}
|
||||
if binaryName := r.Options["BinaryName"]; binaryName != "" {
|
||||
debugPrefix += fmt.Sprintf("[%v]", binaryName)
|
||||
}
|
||||
debugPrefix += ": "
|
||||
|
||||
var runtimeSupportsRro bool
|
||||
if r.Type == plugins.RuntimeRuncV2 {
|
||||
features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
|
||||
if err != nil {
|
||||
log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
|
||||
} else {
|
||||
log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
|
||||
for _, s := range features.MountOptions {
|
||||
if s == "rro" {
|
||||
runtimeSupportsRro = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch r.TreatRoMountsAsRro {
|
||||
case TernaryDisabled:
|
||||
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
|
||||
return false, nil
|
||||
case TernaryEnabled:
|
||||
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
|
||||
if !kernelSupportsRro {
|
||||
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
|
||||
}
|
||||
if !runtimeSupportsRro {
|
||||
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
|
||||
}
|
||||
return true, nil
|
||||
case TernaryEmpty, TernaryIfPossible:
|
||||
if r.Type != plugins.RuntimeRuncV2 {
|
||||
log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
|
||||
return false, nil
|
||||
}
|
||||
if !kernelSupportsRro {
|
||||
msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
|
||||
if goruntime.GOOS == "linux" {
|
||||
msg += " (Hint: upgrade the kernel to v5.12 or later)"
|
||||
log.G(ctx).Warn(msg)
|
||||
} else {
|
||||
log.G(ctx).Debug(msg)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
if !runtimeSupportsRro {
|
||||
log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
|
||||
return false, nil
|
||||
}
|
||||
log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
|
||||
return true, nil
|
||||
default:
|
||||
return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
|
||||
r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateRuntimeConfig validates the given runtime configuration.
|
||||
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
|
||||
var warnings []deprecation.Warning
|
||||
if c.ContainerdConfig.Runtimes == nil {
|
||||
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
|
||||
}
|
||||
|
||||
// Validation for default_runtime_name
|
||||
if c.ContainerdConfig.DefaultRuntimeName == "" {
|
||||
return warnings, errors.New("`default_runtime_name` is empty")
|
||||
}
|
||||
if _, ok := c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName]; !ok {
|
||||
return warnings, fmt.Errorf("no corresponding runtime configured in `containerd.runtimes` for `containerd` `default_runtime_name = \"%s\"", c.ContainerdConfig.DefaultRuntimeName)
|
||||
}
|
||||
|
||||
for k, r := range c.ContainerdConfig.Runtimes {
|
||||
if !r.PrivilegedWithoutHostDevices && r.PrivilegedWithoutHostDevicesAllDevicesAllowed {
|
||||
return warnings, errors.New("`privileged_without_host_devices_all_devices_allowed` requires `privileged_without_host_devices` to be enabled")
|
||||
}
|
||||
// If empty, use default podSandbox mode
|
||||
if len(r.Sandboxer) == 0 {
|
||||
r.Sandboxer = string(ModePodSandbox)
|
||||
}
|
||||
|
||||
// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
|
||||
var err error
|
||||
r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
|
||||
if err != nil {
|
||||
return warnings, err
|
||||
}
|
||||
c.ContainerdConfig.Runtimes[k] = r
|
||||
}
|
||||
|
||||
// Validation for drain_exec_sync_io_timeout
|
||||
if c.DrainExecSyncIOTimeout != "" {
|
||||
if _, err := time.ParseDuration(c.DrainExecSyncIOTimeout); err != nil {
|
||||
return warnings, fmt.Errorf("invalid `drain_exec_sync_io_timeout`: %w", err)
|
||||
}
|
||||
}
|
||||
if err := ValidateEnableUnprivileged(ctx, c); err != nil {
|
||||
return warnings, err
|
||||
}
|
||||
return warnings, nil
|
||||
}
|
||||
|
||||
// ValidateServerConfig validates the given server configuration.
|
||||
func ValidateServerConfig(ctx context.Context, c *ServerConfig) ([]deprecation.Warning, error) {
|
||||
var warnings []deprecation.Warning
|
||||
// Validation for stream_idle_timeout
|
||||
if c.StreamIdleTimeout != "" {
|
||||
if _, err := time.ParseDuration(c.StreamIdleTimeout); err != nil {
|
||||
return warnings, fmt.Errorf("invalid stream idle timeout: %w", err)
|
||||
}
|
||||
}
|
||||
return warnings, nil
|
||||
}
|
||||
|
||||
func (config *Config) GetSandboxRuntime(podSandboxConfig *runtime.PodSandboxConfig, runtimeHandler string) (Runtime, error) {
|
||||
if untrustedWorkload(podSandboxConfig) {
|
||||
// If the untrusted annotation is provided, runtimeHandler MUST be empty.
|
||||
if runtimeHandler != "" && runtimeHandler != RuntimeUntrusted {
|
||||
return Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
|
||||
}
|
||||
|
||||
// If the untrusted workload is requesting access to the host/node, this request will fail.
|
||||
//
|
||||
// Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
|
||||
// runtime may support this. For example, in a virtual-machine isolated runtime, privileged
|
||||
// is a supported option, granting the workload to access the entire guest VM instead of host.
|
||||
// TODO(windows): Deprecate this so that we don't need to handle it for windows.
|
||||
if hostAccessingSandbox(podSandboxConfig) {
|
||||
return Runtime{}, errors.New("untrusted workload with host access is not allowed")
|
||||
}
|
||||
|
||||
runtimeHandler = RuntimeUntrusted
|
||||
}
|
||||
|
||||
if runtimeHandler == "" {
|
||||
runtimeHandler = config.DefaultRuntimeName
|
||||
}
|
||||
|
||||
r, ok := config.Runtimes[runtimeHandler]
|
||||
if !ok {
|
||||
return Runtime{}, fmt.Errorf("no runtime for %q is configured", runtimeHandler)
|
||||
}
|
||||
return r, nil
|
||||
|
||||
}
|
||||
|
||||
// untrustedWorkload returns true if the sandbox contains untrusted workload.
|
||||
func untrustedWorkload(config *runtime.PodSandboxConfig) bool {
|
||||
return config.GetAnnotations()[annotations.UntrustedWorkload] == "true"
|
||||
}
|
||||
|
||||
// hostAccessingSandbox returns true if the sandbox configuration
|
||||
// requires additional host access for the sandbox.
|
||||
func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool {
|
||||
securityContext := config.GetLinux().GetSecurityContext()
|
||||
|
||||
namespaceOptions := securityContext.GetNamespaceOptions()
|
||||
if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE ||
|
||||
namespaceOptions.GetPid() == runtime.NamespaceMode_NODE ||
|
||||
namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// GenerateRuntimeOptions generates runtime options from cri plugin config.
|
||||
func GenerateRuntimeOptions(r Runtime) (interface{}, error) {
|
||||
if r.Options == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
b, err := toml.Marshal(r.Options)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal TOML blob for runtime %q: %w", r.Type, err)
|
||||
}
|
||||
|
||||
options := getRuntimeOptionsType(r.Type)
|
||||
if err := toml.Unmarshal(b, options); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// For generic configuration, if no config path specified (preserving old behavior), pass
|
||||
// the whole TOML configuration section to the runtime.
|
||||
if runtimeOpts, ok := options.(*runtimeoptions.Options); ok && runtimeOpts.ConfigPath == "" {
|
||||
runtimeOpts.ConfigBody = b
|
||||
}
|
||||
|
||||
return options, nil
|
||||
}
|
||||
|
||||
// getRuntimeOptionsType gets empty runtime options by the runtime type name.
|
||||
func getRuntimeOptionsType(t string) interface{} {
|
||||
switch t {
|
||||
case plugins.RuntimeRuncV2:
|
||||
return &runcoptions.Options{}
|
||||
case plugins.RuntimeRunhcsV1:
|
||||
return &runhcsoptions.Options{}
|
||||
default:
|
||||
return &runtimeoptions.Options{}
|
||||
}
|
||||
}
|
||||
|
||||
func DefaultServerConfig() ServerConfig {
|
||||
return ServerConfig{
|
||||
DisableTCPService: true,
|
||||
StreamServerAddress: "127.0.0.1",
|
||||
StreamServerPort: "0",
|
||||
StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
|
||||
EnableTLSStreaming: false,
|
||||
X509KeyPairStreaming: X509KeyPairStreaming{
|
||||
TLSKeyFile: "",
|
||||
TLSCertFile: "",
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user