Revert "cri: make read-only mounts recursively read-only"

Revert PR 9713, as it appeared to break the compatibility too much
https://github.com/kubernetes/enhancements/pull/3858#issuecomment-1925441072

This reverts commit b2f254fff0.

> Conflicts:
>	internal/cri/opts/spec_linux_opts.go

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda
2024-02-04 01:13:33 +09:00
parent 96bf529cbf
commit 6670695836
11 changed files with 8 additions and 387 deletions

View File

@@ -21,15 +21,9 @@ import (
"errors"
"fmt"
"net/url"
goruntime "runtime"
"strconv"
"time"
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
apitypes "github.com/containerd/containerd/v2/api/types"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
"github.com/pelletier/go-toml/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubelet/pkg/cri/streaming"
@@ -40,16 +34,8 @@ import (
"github.com/containerd/containerd/v2/pkg/deprecation"
runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
"github.com/containerd/containerd/v2/plugins"
"github.com/opencontainers/image-spec/specs-go"
"github.com/opencontainers/runtime-spec/specs-go/features"
)
func init() {
const prefix = "types.containerd.io"
major := strconv.Itoa(specs.VersionMajor)
typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
}
const (
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
//
@@ -87,17 +73,6 @@ const (
DefaultSandboxImage = "registry.k8s.io/pause:3.9"
)
// Ternary represents a ternary value.
// Ternary is needed because TOML does not accept "null" for boolean values.
type Ternary = string
const (
TernaryEmpty Ternary = "" // alias for IfPossible
TernaryEnabled Ternary = "Enabled"
TernaryIfPossible Ternary = "IfPossible"
TernaryDisabled Ternary = "Disabled"
)
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
// and a runtime for untrusted workload.
type Runtime struct {
@@ -141,15 +116,6 @@ type Runtime struct {
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
// podsandbox - means use Controller implementation from sbserver podsandbox package.
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
// treats read-only mounts as recursive read-only mounts.
// An empty string means "IfPossible".
// "Enabled" requires Linux kernel v5.12 or later.
// Introduced in containerd v2.0.
// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
TreatRoMountsAsRro Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
TreatRoMountsAsRroResolved bool `toml:"-" json:"-"` // Do not set manually
}
// ContainerdConfig contains toml config related to containerd
@@ -533,120 +499,8 @@ func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.War
return warnings, nil
}
func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
if introspectionClient == nil { // happens for unit tests
return nil, errors.New("introspectionClient is nil")
}
infoReq := &introspectionapi.PluginInfoRequest{
Type: string(plugins.RuntimePluginV2),
ID: "task",
}
rr := &apitypes.RuntimeRequest{
RuntimePath: r.Type,
}
if r.Path != "" {
rr.RuntimePath = r.Path
}
options, err := GenerateRuntimeOptions(r)
if err != nil {
return nil, err
}
rr.Options, err = protobuf.MarshalAnyToProto(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
}
infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
if err != nil {
return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
}
infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
if err != nil {
return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
}
var info apitypes.RuntimeInfo
if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
}
featuresX, err := typeurl.UnmarshalAny(info.Features)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
}
features, ok := featuresX.(*features.Features)
if !ok {
return nil, fmt.Errorf("unknown features type %T", featuresX)
}
return features, nil
}
// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
debugPrefix := "treat_ro_mounts_as_rro"
if r.Type != "" {
debugPrefix += fmt.Sprintf("[%s]", r.Type)
}
if binaryName := r.Options["BinaryName"]; binaryName != "" {
debugPrefix += fmt.Sprintf("[%v]", binaryName)
}
debugPrefix += ": "
var runtimeSupportsRro bool
if r.Type == plugins.RuntimeRuncV2 {
features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
if err != nil {
log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
} else {
log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
for _, s := range features.MountOptions {
if s == "rro" {
runtimeSupportsRro = true
break
}
}
}
}
switch r.TreatRoMountsAsRro {
case TernaryDisabled:
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
return false, nil
case TernaryEnabled:
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
if !kernelSupportsRro {
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
}
if !runtimeSupportsRro {
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
}
return true, nil
case TernaryEmpty, TernaryIfPossible:
if r.Type != plugins.RuntimeRuncV2 {
log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
return false, nil
}
if !kernelSupportsRro {
msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
if goruntime.GOOS == "linux" {
msg += " (Hint: upgrade the kernel to v5.12 or later)"
log.G(ctx).Warn(msg)
} else {
log.G(ctx).Debug(msg)
}
return false, nil
}
if !runtimeSupportsRro {
log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
return false, nil
}
log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
return true, nil
default:
return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
}
}
// ValidateRuntimeConfig validates the given runtime configuration.
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
var warnings []deprecation.Warning
if c.ContainerdConfig.Runtimes == nil {
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
@@ -667,15 +521,8 @@ func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionC
// If empty, use default podSandbox mode
if len(r.Sandboxer) == 0 {
r.Sandboxer = string(ModePodSandbox)
c.ContainerdConfig.Runtimes[k] = r
}
// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
var err error
r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
if err != nil {
return warnings, err
}
c.ContainerdConfig.Runtimes[k] = r
}
// Validation for drain_exec_sync_io_timeout

View File

@@ -41,13 +41,3 @@ func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
}
return nil
}
var kernelSupportsRro bool
func init() {
var err error
kernelSupportsRro, err = kernelGreaterEqualThan(kernel.KernelVersion{Kernel: 5, Major: 12})
if err != nil {
panic(fmt.Errorf("check current system kernel version error: %w", err))
}
}

View File

@@ -25,5 +25,3 @@ import (
func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
return nil
}
var kernelSupportsRro bool

View File

@@ -222,7 +222,7 @@ func TestValidateConfig(t *testing.T) {
t.Run(desc, func(t *testing.T) {
var warnings []deprecation.Warning
if test.runtimeConfig != nil {
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig, nil)
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig)
if test.runtimeExpectedErr != "" {
assert.Contains(t, err.Error(), test.runtimeExpectedErr)
} else {

View File

@@ -38,14 +38,8 @@ import (
"github.com/containerd/log"
)
// RuntimeConfig is a subset of [github.com/containerd/containerd/v2/internal/cri/config].
// Needed for avoiding circular imports.
type RuntimeConfig struct {
TreatRoMountsAsRro bool // only applies to volumes
}
// WithMounts sorts and adds runtime and CRI mounts to the spec
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string, rtConfig *RuntimeConfig) oci.SpecOpts {
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
@@ -73,7 +67,6 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
sort.Sort(orderedMounts(mounts))
// Mount cgroup into the container as readonly, which inherits docker's behavior.
// TreatRoMountsAsRro does not apply here, as /sys/fs/cgroup is not a volume.
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
@@ -155,25 +148,10 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
options = append(options, "rprivate")
}
var srcIsDir bool
if srcSt, err := osi.Stat(src); err != nil {
if errors.Is(err, os.ErrNotExist) { // happens when osi is FakeOS
srcIsDir = true // assume src to be dir
} else {
return fmt.Errorf("failed to stat mount source %q: %w", src, err)
}
} else if srcSt != nil { // srcSt can be nil when osi is FakeOS
srcIsDir = srcSt.IsDir()
}
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
if rtConfig != nil && rtConfig.TreatRoMountsAsRro && srcIsDir {
options = append(options, "rro")
} else {
options = append(options, "ro")
}
options = append(options, "ro")
} else {
options = append(options, "rw")
}

View File

@@ -683,9 +683,7 @@ func (c *criService) buildLinuxSpec(
}
}()
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel, &customopts.RuntimeConfig{
TreatRoMountsAsRro: ociRuntime.TreatRoMountsAsRroResolved,
}))
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
if !c.config.DisableProcMount {
// Change the default masked/readonly paths to empty slices

View File

@@ -597,7 +597,7 @@ func TestMountPropagation(t *testing.T) {
var spec runtimespec.Spec
spec.Linux = &runtimespec.Linux{}
err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "", nil)(context.Background(), nil, nil, &spec)
err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec)
if test.expectErr {
require.Error(t, err)
} else {