Revert "cri: make read-only mounts recursively read-only"

Revert PR 9713, as it appeared to break the compatibility too much https://github.com/kubernetes/enhancements/pull/3858#issuecomment-1925441072 This reverts commit b2f254fff0. > Conflicts: > internal/cri/opts/spec_linux_opts.go Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
2024-02-04 01:13:33 +09:00
parent 96bf529cbf
commit 6670695836
11 changed files with 8 additions and 387 deletions
--- a/internal/cri/config/config.go
+++ b/internal/cri/config/config.go
@@ -21,15 +21,9 @@ import (
 	"errors"
 	"fmt"
 	"net/url"
-	goruntime "runtime"
-	"strconv"
 	"time"

-	introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
-	apitypes "github.com/containerd/containerd/v2/api/types"
-	"github.com/containerd/containerd/v2/protobuf"
 	"github.com/containerd/log"
-	"github.com/containerd/typeurl/v2"
 	"github.com/pelletier/go-toml/v2"
 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
 	"k8s.io/kubelet/pkg/cri/streaming"
@@ -40,16 +34,8 @@ import (
 	"github.com/containerd/containerd/v2/pkg/deprecation"
 	runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
 	"github.com/containerd/containerd/v2/plugins"
-	"github.com/opencontainers/image-spec/specs-go"
-	"github.com/opencontainers/runtime-spec/specs-go/features"
 )

-func init() {
-	const prefix = "types.containerd.io"
-	major := strconv.Itoa(specs.VersionMajor)
-	typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
-}
-
 const (
 	// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
 	//
@@ -87,17 +73,6 @@ const (
 	DefaultSandboxImage = "registry.k8s.io/pause:3.9"
 )

-// Ternary represents a ternary value.
-// Ternary is needed because TOML does not accept "null" for boolean values.
-type Ternary = string
-
-const (
-	TernaryEmpty      Ternary = "" // alias for IfPossible
-	TernaryEnabled    Ternary = "Enabled"
-	TernaryIfPossible Ternary = "IfPossible"
-	TernaryDisabled   Ternary = "Disabled"
-)
-
 // Runtime struct to contain the type(ID), engine, and root variables for a default runtime
 // and a runtime for untrusted workload.
 type Runtime struct {
@@ -141,15 +116,6 @@ type Runtime struct {
 	// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
 	// podsandbox - means use Controller implementation from sbserver podsandbox package.
 	Sandboxer string `toml:"sandboxer" json:"sandboxer"`
-
-	// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
-	// treats read-only mounts as recursive read-only mounts.
-	// An empty string means "IfPossible".
-	// "Enabled" requires Linux kernel v5.12 or later.
-	// Introduced in containerd v2.0.
-	// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
-	TreatRoMountsAsRro         Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
-	TreatRoMountsAsRroResolved bool    `toml:"-" json:"-"` // Do not set manually
 }

 // ContainerdConfig contains toml config related to containerd
@@ -533,120 +499,8 @@ func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.War
 	return warnings, nil
 }

-func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
-	if introspectionClient == nil { // happens for unit tests
-		return nil, errors.New("introspectionClient is nil")
-	}
-	infoReq := &introspectionapi.PluginInfoRequest{
-		Type: string(plugins.RuntimePluginV2),
-		ID:   "task",
-	}
-	rr := &apitypes.RuntimeRequest{
-		RuntimePath: r.Type,
-	}
-	if r.Path != "" {
-		rr.RuntimePath = r.Path
-	}
-	options, err := GenerateRuntimeOptions(r)
-	if err != nil {
-		return nil, err
-	}
-	rr.Options, err = protobuf.MarshalAnyToProto(options)
-	if err != nil {
-		return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
-	}
-	infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
-	if err != nil {
-		return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
-	}
-	infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
-	if err != nil {
-		return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
-	}
-	var info apitypes.RuntimeInfo
-	if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
-		return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
-	}
-	featuresX, err := typeurl.UnmarshalAny(info.Features)
-	if err != nil {
-		return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
-	}
-	features, ok := featuresX.(*features.Features)
-	if !ok {
-		return nil, fmt.Errorf("unknown features type %T", featuresX)
-	}
-	return features, nil
-}
-
-// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
-func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
-	debugPrefix := "treat_ro_mounts_as_rro"
-	if r.Type != "" {
-		debugPrefix += fmt.Sprintf("[%s]", r.Type)
-	}
-	if binaryName := r.Options["BinaryName"]; binaryName != "" {
-		debugPrefix += fmt.Sprintf("[%v]", binaryName)
-	}
-	debugPrefix += ": "
-
-	var runtimeSupportsRro bool
-	if r.Type == plugins.RuntimeRuncV2 {
-		features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
-		if err != nil {
-			log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
-		} else {
-			log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
-			for _, s := range features.MountOptions {
-				if s == "rro" {
-					runtimeSupportsRro = true
-					break
-				}
-			}
-		}
-	}
-
-	switch r.TreatRoMountsAsRro {
-	case TernaryDisabled:
-		log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
-		return false, nil
-	case TernaryEnabled:
-		log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
-		if !kernelSupportsRro {
-			return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
-		}
-		if !runtimeSupportsRro {
-			return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
-		}
-		return true, nil
-	case TernaryEmpty, TernaryIfPossible:
-		if r.Type != plugins.RuntimeRuncV2 {
-			log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
-			return false, nil
-		}
-		if !kernelSupportsRro {
-			msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
-			if goruntime.GOOS == "linux" {
-				msg += " (Hint: upgrade the kernel to v5.12 or later)"
-				log.G(ctx).Warn(msg)
-			} else {
-				log.G(ctx).Debug(msg)
-			}
-			return false, nil
-		}
-		if !runtimeSupportsRro {
-			log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
-			return false, nil
-		}
-		log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
-		return true, nil
-	default:
-		return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
-			r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
-	}
-}
-
 // ValidateRuntimeConfig validates the given runtime configuration.
-func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
+func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
 	var warnings []deprecation.Warning
 	if c.ContainerdConfig.Runtimes == nil {
 		c.ContainerdConfig.Runtimes = make(map[string]Runtime)
@@ -667,15 +521,8 @@ func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionC
 		// If empty, use default podSandbox mode
 		if len(r.Sandboxer) == 0 {
 			r.Sandboxer = string(ModePodSandbox)
+			c.ContainerdConfig.Runtimes[k] = r
 		}
-
-		// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
-		var err error
-		r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
-		if err != nil {
-			return warnings, err
-		}
-		c.ContainerdConfig.Runtimes[k] = r
 	}

 	// Validation for drain_exec_sync_io_timeout
--- a/internal/cri/config/config_kernel_linux.go
+++ b/internal/cri/config/config_kernel_linux.go
@@ -41,13 +41,3 @@ func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
 	}
 	return nil
 }
-
-var kernelSupportsRro bool
-
-func init() {
-	var err error
-	kernelSupportsRro, err = kernelGreaterEqualThan(kernel.KernelVersion{Kernel: 5, Major: 12})
-	if err != nil {
-		panic(fmt.Errorf("check current system kernel version error: %w", err))
-	}
-}
--- a/internal/cri/config/config_kernel_other.go
+++ b/internal/cri/config/config_kernel_other.go
@@ -25,5 +25,3 @@ import (
 func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
 	return nil
 }
-
-var kernelSupportsRro bool
--- a/internal/cri/config/config_test.go
+++ b/internal/cri/config/config_test.go
@@ -222,7 +222,7 @@ func TestValidateConfig(t *testing.T) {
 		t.Run(desc, func(t *testing.T) {
 			var warnings []deprecation.Warning
 			if test.runtimeConfig != nil {
-				w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig, nil)
+				w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig)
 				if test.runtimeExpectedErr != "" {
 					assert.Contains(t, err.Error(), test.runtimeExpectedErr)
 				} else {
--- a/internal/cri/opts/spec_linux_opts.go
+++ b/internal/cri/opts/spec_linux_opts.go
@@ -38,14 +38,8 @@ import (
 	"github.com/containerd/log"
 )

-// RuntimeConfig is a subset of [github.com/containerd/containerd/v2/internal/cri/config].
-// Needed for avoiding circular imports.
-type RuntimeConfig struct {
-	TreatRoMountsAsRro bool // only applies to volumes
-}
-
 // WithMounts sorts and adds runtime and CRI mounts to the spec
-func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string, rtConfig *RuntimeConfig) oci.SpecOpts {
+func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
 	return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
 		// mergeMounts merge CRI mounts with extra mounts. If a mount destination
 		// is mounted by both a CRI mount and an extra mount, the CRI mount will
@@ -73,7 +67,6 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
 		sort.Sort(orderedMounts(mounts))

 		// Mount cgroup into the container as readonly, which inherits docker's behavior.
-		// TreatRoMountsAsRro does not apply here, as /sys/fs/cgroup is not a volume.
 		s.Mounts = append(s.Mounts, runtimespec.Mount{
 			Source:      "cgroup",
 			Destination: "/sys/fs/cgroup",
@@ -155,25 +148,10 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
 				options = append(options, "rprivate")
 			}

-			var srcIsDir bool
-			if srcSt, err := osi.Stat(src); err != nil {
-				if errors.Is(err, os.ErrNotExist) { // happens when osi is FakeOS
-					srcIsDir = true // assume src to be dir
-				} else {
-					return fmt.Errorf("failed to stat mount source %q: %w", src, err)
-				}
-			} else if srcSt != nil { // srcSt can be nil when osi is FakeOS
-				srcIsDir = srcSt.IsDir()
-			}
-
 			// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
 			// is readonly. This is different from docker's behavior, but make more sense.
 			if mount.GetReadonly() {
-				if rtConfig != nil && rtConfig.TreatRoMountsAsRro && srcIsDir {
-					options = append(options, "rro")
-				} else {
-					options = append(options, "ro")
-				}
+				options = append(options, "ro")
 			} else {
 				options = append(options, "rw")
 			}
--- a/internal/cri/server/container_create.go
+++ b/internal/cri/server/container_create.go
@@ -683,9 +683,7 @@ func (c *criService) buildLinuxSpec(
 		}
 	}()

-	specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel, &customopts.RuntimeConfig{
-		TreatRoMountsAsRro: ociRuntime.TreatRoMountsAsRroResolved,
-	}))
+	specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))

 	if !c.config.DisableProcMount {
 		// Change the default masked/readonly paths to empty slices
--- a/internal/cri/server/container_create_linux_test.go
+++ b/internal/cri/server/container_create_linux_test.go
@@ -597,7 +597,7 @@ func TestMountPropagation(t *testing.T) {
 			var spec runtimespec.Spec
 			spec.Linux = &runtimespec.Linux{}

-			err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "", nil)(context.Background(), nil, nil, &spec)
+			err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec)
 			if test.expectErr {
 				require.Error(t, err)
 			} else {