Revert "cri: make read-only mounts recursively read-only"

Revert PR 9713, as it appeared to break the compatibility too much
https://github.com/kubernetes/enhancements/pull/3858#issuecomment-1925441072

This reverts commit b2f254fff0.

> Conflicts:
>	internal/cri/opts/spec_linux_opts.go

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2024-02-04 01:13:33 +09:00
parent 96bf529cbf
commit 6670695836
No known key found for this signature in database
GPG Key ID: 49524C6F9F638F1A
11 changed files with 8 additions and 387 deletions

View File

@ -461,25 +461,6 @@ version = 2
</p></details>
## Other breaking changes
### containerd v2.0
#### CRI plugin treats read-only mounts recursively read-only
Starting with containerd v2.0, the CRI plugin treats read-only mounts
as recursively read-only mounts when running on Linux kernel v5.12 or later.
To rollback to the legacy behavior that corresponds to containerd v1.x,
set the following config:
```toml
version = 2
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
# treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
# treats read-only mounts as recursive read-only mounts.
# An empty string means "IfPossible".
# "Enabled" requires Linux kernel v5.12 or later.
# This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
treat_ro_mounts_as_rro = "Disabled"
```
## Experimental features
Experimental features are new features added to containerd which do not have the

View File

@ -369,14 +369,6 @@ version = 2
# See https://github.com/containerd/containerd/issues/6657 for context.
snapshotter = ""
# treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
# treats read-only mounts as recursive read-only mounts.
# An empty string means "IfPossible".
# "Enabled" requires Linux kernel v5.12 or later.
# Introduced in containerd v2.0.
# This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
treat_ro_mounts_as_rro = ""
# 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options' is options specific to
# "io.containerd.runc.v1" and "io.containerd.runc.v2". Its corresponding options type is:
# https://github.com/containerd/containerd/blob/v1.3.2/runtime/v2/runc/options/oci.pb.go#L26 .

View File

@ -1,149 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package integration
import (
"fmt"
"os"
"path/filepath"
"syscall"
"testing"
"time"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/integration/images"
"github.com/containerd/containerd/v2/pkg/kernelversion"
"github.com/opencontainers/selinux/go-selinux"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func testReadonlyMounts(t *testing.T, mode string, expectRRO bool) {
workDir := t.TempDir()
mntSrcDir := filepath.Join(workDir, "mnt") // "/mnt" in the container
require.NoError(t, os.MkdirAll(mntSrcDir, 0755))
tmpfsDir := filepath.Join(mntSrcDir, "tmpfs") // "/mnt/tmpfs" in the container
require.NoError(t, os.MkdirAll(tmpfsDir, 0755))
tmpfsMount := mount.Mount{
Type: "tmpfs",
Source: "none",
}
require.NoError(t, tmpfsMount.Mount(tmpfsDir))
t.Cleanup(func() {
require.NoError(t, mount.UnmountAll(tmpfsDir, 0))
})
podLogDir := filepath.Join(workDir, "podLogDir")
require.NoError(t, os.MkdirAll(podLogDir, 0755))
config := `version = 2
`
if mode != "" {
config += fmt.Sprintf(`
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
treat_ro_mount_as_rro = %q
`, mode)
}
require.NoError(t, os.WriteFile(filepath.Join(workDir, "config.toml"),
[]byte(config), 0644))
ctrdProc := newCtrdProc(t, "containerd", workDir)
t.Cleanup(func() {
cleanupPods(t, ctrdProc.criRuntimeService(t))
require.NoError(t, ctrdProc.kill(syscall.SIGTERM))
require.NoError(t, ctrdProc.wait(5*time.Minute))
if t.Failed() {
dumpFileContent(t, ctrdProc.logPath())
}
})
runtimeServiceOrig, imageServiceOrig := runtimeService, imageService
runtimeService, imageService = ctrdProc.criRuntimeService(t), ctrdProc.criImageService(t)
t.Cleanup(func() {
runtimeService, imageService = runtimeServiceOrig, imageServiceOrig
})
require.NoError(t, ctrdProc.isReady())
sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox", "test-ro-mounts",
WithPodLogDirectory(podLogDir),
)
testImage := images.Get(images.BusyBox)
EnsureImageExists(t, testImage)
containerName := "test-container"
cnConfig := ContainerConfig(
containerName,
testImage,
WithCommand("/bin/touch", "/mnt/tmpfs/file"),
WithLogPath(containerName),
func(c *runtime.ContainerConfig) {
c.Mounts = append(c.Mounts, &runtime.Mount{
HostPath: mntSrcDir,
ContainerPath: "/mnt",
SelinuxRelabel: selinux.GetEnabled(),
Readonly: true,
})
},
)
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
require.NoError(t, err)
t.Log("Start the container")
require.NoError(t, runtimeService.StartContainer(cn))
t.Log("Wait for container to finish running")
exitCode := -1
require.NoError(t, Eventually(func() (bool, error) {
s, err := runtimeService.ContainerStatus(cn)
if err != nil {
return false, err
}
if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
exitCode = int(s.ExitCode)
return true, nil
}
return false, nil
}, time.Second, 30*time.Second))
output, err := os.ReadFile(filepath.Join(podLogDir, containerName))
assert.NoError(t, err)
t.Logf("exitCode=%d, output=%q", exitCode, output)
if expectRRO {
require.NotEqual(t, 0, exitCode)
require.Contains(t, string(output), "stderr F touch: /mnt/tmpfs/file: Read-only file system\n")
} else {
require.Equal(t, 0, exitCode)
}
}
func TestReadonlyMounts(t *testing.T) {
kernelSupportsRRO, err := kernelversion.GreaterEqualThan(kernelversion.KernelVersion{Kernel: 5, Major: 12})
require.NoError(t, err)
t.Run("Default", func(t *testing.T) {
testReadonlyMounts(t, "", kernelSupportsRRO)
})
t.Run("Disabled", func(t *testing.T) {
testReadonlyMounts(t, "Disabled", false)
})
if kernelSupportsRRO {
t.Run("Enabled", func(t *testing.T) {
testReadonlyMounts(t, "Enabled", true)
})
}
}

View File

@ -21,15 +21,9 @@ import (
"errors"
"fmt"
"net/url"
goruntime "runtime"
"strconv"
"time"
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
apitypes "github.com/containerd/containerd/v2/api/types"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
"github.com/pelletier/go-toml/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubelet/pkg/cri/streaming"
@ -40,16 +34,8 @@ import (
"github.com/containerd/containerd/v2/pkg/deprecation"
runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
"github.com/containerd/containerd/v2/plugins"
"github.com/opencontainers/image-spec/specs-go"
"github.com/opencontainers/runtime-spec/specs-go/features"
)
func init() {
const prefix = "types.containerd.io"
major := strconv.Itoa(specs.VersionMajor)
typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
}
const (
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
//
@ -87,17 +73,6 @@ const (
DefaultSandboxImage = "registry.k8s.io/pause:3.9"
)
// Ternary represents a ternary value.
// Ternary is needed because TOML does not accept "null" for boolean values.
type Ternary = string
const (
TernaryEmpty Ternary = "" // alias for IfPossible
TernaryEnabled Ternary = "Enabled"
TernaryIfPossible Ternary = "IfPossible"
TernaryDisabled Ternary = "Disabled"
)
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
// and a runtime for untrusted workload.
type Runtime struct {
@ -141,15 +116,6 @@ type Runtime struct {
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
// podsandbox - means use Controller implementation from sbserver podsandbox package.
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
// treats read-only mounts as recursive read-only mounts.
// An empty string means "IfPossible".
// "Enabled" requires Linux kernel v5.12 or later.
// Introduced in containerd v2.0.
// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
TreatRoMountsAsRro Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
TreatRoMountsAsRroResolved bool `toml:"-" json:"-"` // Do not set manually
}
// ContainerdConfig contains toml config related to containerd
@ -533,120 +499,8 @@ func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.War
return warnings, nil
}
func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
if introspectionClient == nil { // happens for unit tests
return nil, errors.New("introspectionClient is nil")
}
infoReq := &introspectionapi.PluginInfoRequest{
Type: string(plugins.RuntimePluginV2),
ID: "task",
}
rr := &apitypes.RuntimeRequest{
RuntimePath: r.Type,
}
if r.Path != "" {
rr.RuntimePath = r.Path
}
options, err := GenerateRuntimeOptions(r)
if err != nil {
return nil, err
}
rr.Options, err = protobuf.MarshalAnyToProto(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
}
infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
if err != nil {
return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
}
infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
if err != nil {
return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
}
var info apitypes.RuntimeInfo
if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
}
featuresX, err := typeurl.UnmarshalAny(info.Features)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
}
features, ok := featuresX.(*features.Features)
if !ok {
return nil, fmt.Errorf("unknown features type %T", featuresX)
}
return features, nil
}
// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
debugPrefix := "treat_ro_mounts_as_rro"
if r.Type != "" {
debugPrefix += fmt.Sprintf("[%s]", r.Type)
}
if binaryName := r.Options["BinaryName"]; binaryName != "" {
debugPrefix += fmt.Sprintf("[%v]", binaryName)
}
debugPrefix += ": "
var runtimeSupportsRro bool
if r.Type == plugins.RuntimeRuncV2 {
features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
if err != nil {
log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
} else {
log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
for _, s := range features.MountOptions {
if s == "rro" {
runtimeSupportsRro = true
break
}
}
}
}
switch r.TreatRoMountsAsRro {
case TernaryDisabled:
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
return false, nil
case TernaryEnabled:
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
if !kernelSupportsRro {
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
}
if !runtimeSupportsRro {
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
}
return true, nil
case TernaryEmpty, TernaryIfPossible:
if r.Type != plugins.RuntimeRuncV2 {
log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
return false, nil
}
if !kernelSupportsRro {
msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
if goruntime.GOOS == "linux" {
msg += " (Hint: upgrade the kernel to v5.12 or later)"
log.G(ctx).Warn(msg)
} else {
log.G(ctx).Debug(msg)
}
return false, nil
}
if !runtimeSupportsRro {
log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
return false, nil
}
log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
return true, nil
default:
return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
}
}
// ValidateRuntimeConfig validates the given runtime configuration.
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
var warnings []deprecation.Warning
if c.ContainerdConfig.Runtimes == nil {
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
@ -667,16 +521,9 @@ func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionC
// If empty, use default podSandbox mode
if len(r.Sandboxer) == 0 {
r.Sandboxer = string(ModePodSandbox)
}
// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
var err error
r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
if err != nil {
return warnings, err
}
c.ContainerdConfig.Runtimes[k] = r
}
}
// Validation for drain_exec_sync_io_timeout
if c.DrainExecSyncIOTimeout != "" {

View File

@ -41,13 +41,3 @@ func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
}
return nil
}
var kernelSupportsRro bool
func init() {
var err error
kernelSupportsRro, err = kernelGreaterEqualThan(kernel.KernelVersion{Kernel: 5, Major: 12})
if err != nil {
panic(fmt.Errorf("check current system kernel version error: %w", err))
}
}

View File

@ -25,5 +25,3 @@ import (
func ValidateEnableUnprivileged(ctx context.Context, c *RuntimeConfig) error {
return nil
}
var kernelSupportsRro bool

View File

@ -222,7 +222,7 @@ func TestValidateConfig(t *testing.T) {
t.Run(desc, func(t *testing.T) {
var warnings []deprecation.Warning
if test.runtimeConfig != nil {
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig, nil)
w, err := ValidateRuntimeConfig(context.Background(), test.runtimeConfig)
if test.runtimeExpectedErr != "" {
assert.Contains(t, err.Error(), test.runtimeExpectedErr)
} else {

View File

@ -38,14 +38,8 @@ import (
"github.com/containerd/log"
)
// RuntimeConfig is a subset of [github.com/containerd/containerd/v2/internal/cri/config].
// Needed for avoiding circular imports.
type RuntimeConfig struct {
TreatRoMountsAsRro bool // only applies to volumes
}
// WithMounts sorts and adds runtime and CRI mounts to the spec
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string, rtConfig *RuntimeConfig) oci.SpecOpts {
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
@ -73,7 +67,6 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
sort.Sort(orderedMounts(mounts))
// Mount cgroup into the container as readonly, which inherits docker's behavior.
// TreatRoMountsAsRro does not apply here, as /sys/fs/cgroup is not a volume.
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
@ -155,25 +148,10 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
options = append(options, "rprivate")
}
var srcIsDir bool
if srcSt, err := osi.Stat(src); err != nil {
if errors.Is(err, os.ErrNotExist) { // happens when osi is FakeOS
srcIsDir = true // assume src to be dir
} else {
return fmt.Errorf("failed to stat mount source %q: %w", src, err)
}
} else if srcSt != nil { // srcSt can be nil when osi is FakeOS
srcIsDir = srcSt.IsDir()
}
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
if rtConfig != nil && rtConfig.TreatRoMountsAsRro && srcIsDir {
options = append(options, "rro")
} else {
options = append(options, "ro")
}
} else {
options = append(options, "rw")
}

View File

@ -683,9 +683,7 @@ func (c *criService) buildLinuxSpec(
}
}()
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel, &customopts.RuntimeConfig{
TreatRoMountsAsRro: ociRuntime.TreatRoMountsAsRroResolved,
}))
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
if !c.config.DisableProcMount {
// Change the default masked/readonly paths to empty slices

View File

@ -597,7 +597,7 @@ func TestMountPropagation(t *testing.T) {
var spec runtimespec.Spec
spec.Linux = &runtimespec.Linux{}
err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "", nil)(context.Background(), nil, nil, &spec)
err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec)
if test.expectErr {
require.Error(t, err)
} else {

View File

@ -24,7 +24,6 @@ import (
"os"
"path/filepath"
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
"github.com/containerd/log"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
@ -36,7 +35,6 @@ import (
"github.com/containerd/containerd/v2/internal/cri/constants"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/plugins/services"
"github.com/containerd/containerd/v2/plugins/services/warning"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
@ -52,7 +50,6 @@ func init() {
Config: &config,
Requires: []plugin.Type{
plugins.WarningPlugin,
plugins.ServicePlugin,
},
ConfigMigration: func(ctx context.Context, version int, pluginConfigs map[string]interface{}) error {
if version >= srvconfig.CurrentConfigVersion {
@ -76,18 +73,7 @@ func initCRIRuntime(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Exports = map[string]string{"CRIVersion": constants.CRIVersion}
ctx := ic.Context
pluginConfig := ic.Config.(*criconfig.RuntimeConfig)
introspectionService, err := ic.GetByID(plugins.ServicePlugin, services.IntrospectionService)
if err != nil {
return nil, fmt.Errorf("failed to get plugin (%q, %q): %w",
plugins.ServicePlugin, services.IntrospectionService, err)
}
introspectionClient, ok := introspectionService.(introspectionapi.IntrospectionClient)
if !ok {
return nil, fmt.Errorf("%+v does not implement IntrospectionClient interfae", introspectionService)
}
if warnings, err := criconfig.ValidateRuntimeConfig(ctx, pluginConfig, introspectionClient); err != nil {
if warnings, err := criconfig.ValidateRuntimeConfig(ctx, pluginConfig); err != nil {
return nil, fmt.Errorf("invalid plugin config: %w", err)
} else if len(warnings) > 0 {
ws, err := ic.GetSingle(plugins.WarningPlugin)