Prior to this commit, `readOnly` volumes were not recursively read-only and
could result in compromise of data;
e.g., even if `/mnt` was mounted as read-only, its submounts such as
`/mnt/usbstorage` were not read-only.
This commit utilizes runc's "rro" bind mount option to make read-only bind
mounts literally read-only. The "rro" bind mount options is implemented by
calling `mount_setattr(2)` with `MOUNT_ATTR_RDONLY` and `AT_RECURSIVE`.
The "rro" bind mount options requires kernel >= 5.12, with runc >= 1.1 or
a compatible runtime such as crun >= 1.4.
When the "rro" bind mount options is not available, containerd falls back
to the legacy non-recursive read-only mounts by default.
The behavior is configurable via `/etc/containerd/config.toml`:
```toml
version = 2
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
  # treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
  # treats read-only mounts as recursive read-only mounts.
  # An empty string means "IfPossible".
  # "Enabled" requires Linux kernel v5.12 or later.
  # This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
  treat_ro_mounts_as_rro = ""
```
Replaces:
- kubernetes/enhancements issue 3857
- kubernetes/enhancements PR 3858
Note: this change does not affect non-CRI clients such as ctr, nerdctl, and Docker/Moby.
RRO mounts have been supported since nerdctl v0.14 (containerd/nerdctl PR 511)
and Docker v25 (moby/moby PR 45278).
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
		
	
		
			
				
	
	
		
			150 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			150 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package integration
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"os"
 | 
						|
	"path/filepath"
 | 
						|
	"syscall"
 | 
						|
	"testing"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"github.com/containerd/containerd/v2/core/mount"
 | 
						|
	"github.com/containerd/containerd/v2/integration/images"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/kernelversion"
 | 
						|
	"github.com/opencontainers/selinux/go-selinux"
 | 
						|
	"github.com/stretchr/testify/assert"
 | 
						|
	"github.com/stretchr/testify/require"
 | 
						|
	runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
 | 
						|
)
 | 
						|
 | 
						|
func testReadonlyMounts(t *testing.T, mode string, expectRRO bool) {
 | 
						|
	workDir := t.TempDir()
 | 
						|
	mntSrcDir := filepath.Join(workDir, "mnt") // "/mnt" in the container
 | 
						|
	require.NoError(t, os.MkdirAll(mntSrcDir, 0755))
 | 
						|
	tmpfsDir := filepath.Join(mntSrcDir, "tmpfs") // "/mnt/tmpfs" in the container
 | 
						|
	require.NoError(t, os.MkdirAll(tmpfsDir, 0755))
 | 
						|
	tmpfsMount := mount.Mount{
 | 
						|
		Type:   "tmpfs",
 | 
						|
		Source: "none",
 | 
						|
	}
 | 
						|
	require.NoError(t, tmpfsMount.Mount(tmpfsDir))
 | 
						|
	t.Cleanup(func() {
 | 
						|
		require.NoError(t, mount.UnmountAll(tmpfsDir, 0))
 | 
						|
	})
 | 
						|
 | 
						|
	podLogDir := filepath.Join(workDir, "podLogDir")
 | 
						|
	require.NoError(t, os.MkdirAll(podLogDir, 0755))
 | 
						|
 | 
						|
	config := `version = 2
 | 
						|
`
 | 
						|
	if mode != "" {
 | 
						|
		config += fmt.Sprintf(`
 | 
						|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
 | 
						|
  treat_ro_mount_as_rro = %q
 | 
						|
`, mode)
 | 
						|
	}
 | 
						|
	require.NoError(t, os.WriteFile(filepath.Join(workDir, "config.toml"),
 | 
						|
		[]byte(config), 0644))
 | 
						|
	ctrdProc := newCtrdProc(t, "containerd", workDir)
 | 
						|
	t.Cleanup(func() {
 | 
						|
		cleanupPods(t, ctrdProc.criRuntimeService(t))
 | 
						|
		require.NoError(t, ctrdProc.kill(syscall.SIGTERM))
 | 
						|
		require.NoError(t, ctrdProc.wait(5*time.Minute))
 | 
						|
		if t.Failed() {
 | 
						|
			dumpFileContent(t, ctrdProc.logPath())
 | 
						|
		}
 | 
						|
	})
 | 
						|
	runtimeServiceOrig, imageServiceOrig := runtimeService, imageService
 | 
						|
	runtimeService, imageService = ctrdProc.criRuntimeService(t), ctrdProc.criImageService(t)
 | 
						|
	t.Cleanup(func() {
 | 
						|
		runtimeService, imageService = runtimeServiceOrig, imageServiceOrig
 | 
						|
	})
 | 
						|
	require.NoError(t, ctrdProc.isReady())
 | 
						|
 | 
						|
	sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox", "test-ro-mounts",
 | 
						|
		WithPodLogDirectory(podLogDir),
 | 
						|
	)
 | 
						|
 | 
						|
	testImage := images.Get(images.BusyBox)
 | 
						|
	EnsureImageExists(t, testImage)
 | 
						|
 | 
						|
	containerName := "test-container"
 | 
						|
	cnConfig := ContainerConfig(
 | 
						|
		containerName,
 | 
						|
		testImage,
 | 
						|
		WithCommand("/bin/touch", "/mnt/tmpfs/file"),
 | 
						|
		WithLogPath(containerName),
 | 
						|
		func(c *runtime.ContainerConfig) {
 | 
						|
			c.Mounts = append(c.Mounts, &runtime.Mount{
 | 
						|
				HostPath:       mntSrcDir,
 | 
						|
				ContainerPath:  "/mnt",
 | 
						|
				SelinuxRelabel: selinux.GetEnabled(),
 | 
						|
				Readonly:       true,
 | 
						|
			})
 | 
						|
		},
 | 
						|
	)
 | 
						|
 | 
						|
	cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
 | 
						|
	require.NoError(t, err)
 | 
						|
 | 
						|
	t.Log("Start the container")
 | 
						|
	require.NoError(t, runtimeService.StartContainer(cn))
 | 
						|
 | 
						|
	t.Log("Wait for container to finish running")
 | 
						|
	exitCode := -1
 | 
						|
	require.NoError(t, Eventually(func() (bool, error) {
 | 
						|
		s, err := runtimeService.ContainerStatus(cn)
 | 
						|
		if err != nil {
 | 
						|
			return false, err
 | 
						|
		}
 | 
						|
		if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
 | 
						|
			exitCode = int(s.ExitCode)
 | 
						|
			return true, nil
 | 
						|
		}
 | 
						|
		return false, nil
 | 
						|
	}, time.Second, 30*time.Second))
 | 
						|
 | 
						|
	output, err := os.ReadFile(filepath.Join(podLogDir, containerName))
 | 
						|
	assert.NoError(t, err)
 | 
						|
	t.Logf("exitCode=%d, output=%q", exitCode, output)
 | 
						|
 | 
						|
	if expectRRO {
 | 
						|
		require.NotEqual(t, 0, exitCode)
 | 
						|
		require.Contains(t, string(output), "stderr F touch: /mnt/tmpfs/file: Read-only file system\n")
 | 
						|
	} else {
 | 
						|
		require.Equal(t, 0, exitCode)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func TestReadonlyMounts(t *testing.T) {
 | 
						|
	kernelSupportsRRO, err := kernelversion.GreaterEqualThan(kernelversion.KernelVersion{Kernel: 5, Major: 12})
 | 
						|
	require.NoError(t, err)
 | 
						|
	t.Run("Default", func(t *testing.T) {
 | 
						|
		testReadonlyMounts(t, "", kernelSupportsRRO)
 | 
						|
	})
 | 
						|
	t.Run("Disabled", func(t *testing.T) {
 | 
						|
		testReadonlyMounts(t, "Disabled", false)
 | 
						|
	})
 | 
						|
	if kernelSupportsRRO {
 | 
						|
		t.Run("Enabled", func(t *testing.T) {
 | 
						|
			testReadonlyMounts(t, "Enabled", true)
 | 
						|
		})
 | 
						|
	}
 | 
						|
}
 |