cri: make read-only mounts recursively read-only

Prior to this commit, `readOnly` volumes were not recursively read-only and
could result in compromise of data;
e.g., even if `/mnt` was mounted as read-only, its submounts such as
`/mnt/usbstorage` were not read-only.

This commit utilizes runc's "rro" bind mount option to make read-only bind
mounts literally read-only. The "rro" bind mount options is implemented by
calling `mount_setattr(2)` with `MOUNT_ATTR_RDONLY` and `AT_RECURSIVE`.

The "rro" bind mount options requires kernel >= 5.12, with runc >= 1.1 or
a compatible runtime such as crun >= 1.4.

When the "rro" bind mount options is not available, containerd falls back
to the legacy non-recursive read-only mounts by default.

The behavior is configurable via `/etc/containerd/config.toml`:
```toml
version = 2
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
  # treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
  # treats read-only mounts as recursive read-only mounts.
  # An empty string means "IfPossible".
  # "Enabled" requires Linux kernel v5.12 or later.
  # This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
  treat_ro_mounts_as_rro = ""
```

Replaces:
- kubernetes/enhancements issue 3857
- kubernetes/enhancements PR 3858

Note: this change does not affect non-CRI clients such as ctr, nerdctl, and Docker/Moby.
RRO mounts have been supported since nerdctl v0.14 (containerd/nerdctl PR 511)
and Docker v25 (moby/moby PR 45278).

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda
2024-01-30 23:57:53 +09:00
parent 0dbe758833
commit b2f254fff0
11 changed files with 387 additions and 8 deletions

View File

@@ -24,6 +24,7 @@ import (
"os"
"path/filepath"
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
"github.com/containerd/log"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
@@ -35,6 +36,7 @@ import (
"github.com/containerd/containerd/v2/pkg/cri/constants"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/plugins/services"
"github.com/containerd/containerd/v2/plugins/services/warning"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
@@ -50,6 +52,7 @@ func init() {
Config: &config,
Requires: []plugin.Type{
plugins.WarningPlugin,
plugins.ServicePlugin,
},
ConfigMigration: func(ctx context.Context, version int, pluginConfigs map[string]interface{}) error {
if version >= srvconfig.CurrentConfigVersion {
@@ -73,7 +76,18 @@ func initCRIRuntime(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Exports = map[string]string{"CRIVersion": constants.CRIVersion}
ctx := ic.Context
pluginConfig := ic.Config.(*criconfig.RuntimeConfig)
if warnings, err := criconfig.ValidateRuntimeConfig(ctx, pluginConfig); err != nil {
introspectionService, err := ic.GetByID(plugins.ServicePlugin, services.IntrospectionService)
if err != nil {
return nil, fmt.Errorf("failed to get plugin (%q, %q): %w",
plugins.ServicePlugin, services.IntrospectionService, err)
}
introspectionClient, ok := introspectionService.(introspectionapi.IntrospectionClient)
if !ok {
return nil, fmt.Errorf("%+v does not implement IntrospectionClient interfae", introspectionService)
}
if warnings, err := criconfig.ValidateRuntimeConfig(ctx, pluginConfig, introspectionClient); err != nil {
return nil, fmt.Errorf("invalid plugin config: %w", err)
} else if len(warnings) > 0 {
ws, err := ic.GetSingle(plugins.WarningPlugin)