Revert "cri: make read-only mounts recursively read-only"
Revert PR 9713, as it appeared to break the compatibility too much
https://github.com/kubernetes/enhancements/pull/3858#issuecomment-1925441072
This reverts commit b2f254fff0.
> Conflicts:
> internal/cri/opts/spec_linux_opts.go
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
@@ -21,15 +21,9 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
goruntime "runtime"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
|
||||
apitypes "github.com/containerd/containerd/v2/api/types"
|
||||
"github.com/containerd/containerd/v2/protobuf"
|
||||
"github.com/containerd/log"
|
||||
"github.com/containerd/typeurl/v2"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/kubelet/pkg/cri/streaming"
|
||||
@@ -40,16 +34,8 @@ import (
|
||||
"github.com/containerd/containerd/v2/pkg/deprecation"
|
||||
runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
|
||||
"github.com/containerd/containerd/v2/plugins"
|
||||
"github.com/opencontainers/image-spec/specs-go"
|
||||
"github.com/opencontainers/runtime-spec/specs-go/features"
|
||||
)
|
||||
|
||||
func init() {
|
||||
const prefix = "types.containerd.io"
|
||||
major := strconv.Itoa(specs.VersionMajor)
|
||||
typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
|
||||
}
|
||||
|
||||
const (
|
||||
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
|
||||
//
|
||||
@@ -87,17 +73,6 @@ const (
|
||||
DefaultSandboxImage = "registry.k8s.io/pause:3.9"
|
||||
)
|
||||
|
||||
// Ternary represents a ternary value.
|
||||
// Ternary is needed because TOML does not accept "null" for boolean values.
|
||||
type Ternary = string
|
||||
|
||||
const (
|
||||
TernaryEmpty Ternary = "" // alias for IfPossible
|
||||
TernaryEnabled Ternary = "Enabled"
|
||||
TernaryIfPossible Ternary = "IfPossible"
|
||||
TernaryDisabled Ternary = "Disabled"
|
||||
)
|
||||
|
||||
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
|
||||
// and a runtime for untrusted workload.
|
||||
type Runtime struct {
|
||||
@@ -141,15 +116,6 @@ type Runtime struct {
|
||||
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
|
||||
// podsandbox - means use Controller implementation from sbserver podsandbox package.
|
||||
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
|
||||
|
||||
// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
|
||||
// treats read-only mounts as recursive read-only mounts.
|
||||
// An empty string means "IfPossible".
|
||||
// "Enabled" requires Linux kernel v5.12 or later.
|
||||
// Introduced in containerd v2.0.
|
||||
// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
|
||||
TreatRoMountsAsRro Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
|
||||
TreatRoMountsAsRroResolved bool `toml:"-" json:"-"` // Do not set manually
|
||||
}
|
||||
|
||||
// ContainerdConfig contains toml config related to containerd
|
||||
@@ -533,120 +499,8 @@ func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.War
|
||||
return warnings, nil
|
||||
}
|
||||
|
||||
func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
|
||||
if introspectionClient == nil { // happens for unit tests
|
||||
return nil, errors.New("introspectionClient is nil")
|
||||
}
|
||||
infoReq := &introspectionapi.PluginInfoRequest{
|
||||
Type: string(plugins.RuntimePluginV2),
|
||||
ID: "task",
|
||||
}
|
||||
rr := &apitypes.RuntimeRequest{
|
||||
RuntimePath: r.Type,
|
||||
}
|
||||
if r.Path != "" {
|
||||
rr.RuntimePath = r.Path
|
||||
}
|
||||
options, err := GenerateRuntimeOptions(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rr.Options, err = protobuf.MarshalAnyToProto(options)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
|
||||
}
|
||||
infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
|
||||
}
|
||||
infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
|
||||
}
|
||||
var info apitypes.RuntimeInfo
|
||||
if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
|
||||
return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
|
||||
}
|
||||
featuresX, err := typeurl.UnmarshalAny(info.Features)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
|
||||
}
|
||||
features, ok := featuresX.(*features.Features)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown features type %T", featuresX)
|
||||
}
|
||||
return features, nil
|
||||
}
|
||||
|
||||
// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
|
||||
func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
|
||||
debugPrefix := "treat_ro_mounts_as_rro"
|
||||
if r.Type != "" {
|
||||
debugPrefix += fmt.Sprintf("[%s]", r.Type)
|
||||
}
|
||||
if binaryName := r.Options["BinaryName"]; binaryName != "" {
|
||||
debugPrefix += fmt.Sprintf("[%v]", binaryName)
|
||||
}
|
||||
debugPrefix += ": "
|
||||
|
||||
var runtimeSupportsRro bool
|
||||
if r.Type == plugins.RuntimeRuncV2 {
|
||||
features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
|
||||
if err != nil {
|
||||
log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
|
||||
} else {
|
||||
log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
|
||||
for _, s := range features.MountOptions {
|
||||
if s == "rro" {
|
||||
runtimeSupportsRro = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch r.TreatRoMountsAsRro {
|
||||
case TernaryDisabled:
|
||||
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
|
||||
return false, nil
|
||||
case TernaryEnabled:
|
||||
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
|
||||
if !kernelSupportsRro {
|
||||
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
|
||||
}
|
||||
if !runtimeSupportsRro {
|
||||
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
|
||||
}
|
||||
return true, nil
|
||||
case TernaryEmpty, TernaryIfPossible:
|
||||
if r.Type != plugins.RuntimeRuncV2 {
|
||||
log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
|
||||
return false, nil
|
||||
}
|
||||
if !kernelSupportsRro {
|
||||
msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
|
||||
if goruntime.GOOS == "linux" {
|
||||
msg += " (Hint: upgrade the kernel to v5.12 or later)"
|
||||
log.G(ctx).Warn(msg)
|
||||
} else {
|
||||
log.G(ctx).Debug(msg)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
if !runtimeSupportsRro {
|
||||
log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
|
||||
return false, nil
|
||||
}
|
||||
log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
|
||||
return true, nil
|
||||
default:
|
||||
return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
|
||||
r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateRuntimeConfig validates the given runtime configuration.
|
||||
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
|
||||
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
|
||||
var warnings []deprecation.Warning
|
||||
if c.ContainerdConfig.Runtimes == nil {
|
||||
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
|
||||
@@ -667,15 +521,8 @@ func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionC
|
||||
// If empty, use default podSandbox mode
|
||||
if len(r.Sandboxer) == 0 {
|
||||
r.Sandboxer = string(ModePodSandbox)
|
||||
c.ContainerdConfig.Runtimes[k] = r
|
||||
}
|
||||
|
||||
// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
|
||||
var err error
|
||||
r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
|
||||
if err != nil {
|
||||
return warnings, err
|
||||
}
|
||||
c.ContainerdConfig.Runtimes[k] = r
|
||||
}
|
||||
|
||||
// Validation for drain_exec_sync_io_timeout
|
||||
|
||||
@@ -41,13 +41,3 @@ func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var kernelSupportsRro bool
|
||||
|
||||
func init() {
|
||||
var err error
|
||||
kernelSupportsRro, err = kernelGreaterEqualThan(kernel.KernelVersion{Kernel: 5, Major: 12})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("check current system kernel version error: %w", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,5 +25,3 @@ import (
|
||||
func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var kernelSupportsRro bool
|
||||
|
||||
@@ -222,7 +222,7 @@ func TestValidateConfig(t *testing.T) {
|
||||
t.Run(desc, func(t *testing.T) {
|
||||
var warnings []deprecation.Warning
|
||||
if test.runtimeConfig != nil {
|
||||
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig, nil)
|
||||
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig)
|
||||
if test.runtimeExpectedErr != "" {
|
||||
assert.Contains(t, err.Error(), test.runtimeExpectedErr)
|
||||
} else {
|
||||
|
||||
@@ -38,14 +38,8 @@ import (
|
||||
"github.com/containerd/log"
|
||||
)
|
||||
|
||||
// RuntimeConfig is a subset of [github.com/containerd/containerd/v2/internal/cri/config].
|
||||
// Needed for avoiding circular imports.
|
||||
type RuntimeConfig struct {
|
||||
TreatRoMountsAsRro bool // only applies to volumes
|
||||
}
|
||||
|
||||
// WithMounts sorts and adds runtime and CRI mounts to the spec
|
||||
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string, rtConfig *RuntimeConfig) oci.SpecOpts {
|
||||
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
|
||||
// is mounted by both a CRI mount and an extra mount, the CRI mount will
|
||||
@@ -73,7 +67,6 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
|
||||
sort.Sort(orderedMounts(mounts))
|
||||
|
||||
// Mount cgroup into the container as readonly, which inherits docker's behavior.
|
||||
// TreatRoMountsAsRro does not apply here, as /sys/fs/cgroup is not a volume.
|
||||
s.Mounts = append(s.Mounts, runtimespec.Mount{
|
||||
Source: "cgroup",
|
||||
Destination: "/sys/fs/cgroup",
|
||||
@@ -155,25 +148,10 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
|
||||
options = append(options, "rprivate")
|
||||
}
|
||||
|
||||
var srcIsDir bool
|
||||
if srcSt, err := osi.Stat(src); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) { // happens when osi is FakeOS
|
||||
srcIsDir = true // assume src to be dir
|
||||
} else {
|
||||
return fmt.Errorf("failed to stat mount source %q: %w", src, err)
|
||||
}
|
||||
} else if srcSt != nil { // srcSt can be nil when osi is FakeOS
|
||||
srcIsDir = srcSt.IsDir()
|
||||
}
|
||||
|
||||
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
|
||||
// is readonly. This is different from docker's behavior, but make more sense.
|
||||
if mount.GetReadonly() {
|
||||
if rtConfig != nil && rtConfig.TreatRoMountsAsRro && srcIsDir {
|
||||
options = append(options, "rro")
|
||||
} else {
|
||||
options = append(options, "ro")
|
||||
}
|
||||
options = append(options, "ro")
|
||||
} else {
|
||||
options = append(options, "rw")
|
||||
}
|
||||
|
||||
@@ -683,9 +683,7 @@ func (c *criService) buildLinuxSpec(
|
||||
}
|
||||
}()
|
||||
|
||||
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel, &customopts.RuntimeConfig{
|
||||
TreatRoMountsAsRro: ociRuntime.TreatRoMountsAsRroResolved,
|
||||
}))
|
||||
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
|
||||
|
||||
if !c.config.DisableProcMount {
|
||||
// Change the default masked/readonly paths to empty slices
|
||||
|
||||
@@ -597,7 +597,7 @@ func TestMountPropagation(t *testing.T) {
|
||||
var spec runtimespec.Spec
|
||||
spec.Linux = &runtimespec.Linux{}
|
||||
|
||||
err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "", nil)(context.Background(), nil, nil, &spec)
|
||||
err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec)
|
||||
if test.expectErr {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user