containerd/plugins/cri/runtime/plugin.go
Akihiro Suda b2f254fff0
cri: make read-only mounts recursively read-only
Prior to this commit, `readOnly` volumes were not recursively read-only and
could result in compromise of data;
e.g., even if `/mnt` was mounted as read-only, its submounts such as
`/mnt/usbstorage` were not read-only.

This commit utilizes runc's "rro" bind mount option to make read-only bind
mounts literally read-only. The "rro" bind mount options is implemented by
calling `mount_setattr(2)` with `MOUNT_ATTR_RDONLY` and `AT_RECURSIVE`.

The "rro" bind mount options requires kernel >= 5.12, with runc >= 1.1 or
a compatible runtime such as crun >= 1.4.

When the "rro" bind mount options is not available, containerd falls back
to the legacy non-recursive read-only mounts by default.

The behavior is configurable via `/etc/containerd/config.toml`:
```toml
version = 2
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
  # treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
  # treats read-only mounts as recursive read-only mounts.
  # An empty string means "IfPossible".
  # "Enabled" requires Linux kernel v5.12 or later.
  # This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
  treat_ro_mounts_as_rro = ""
```

Replaces:
- kubernetes/enhancements issue 3857
- kubernetes/enhancements PR 3858

Note: this change does not affect non-CRI clients such as ctr, nerdctl, and Docker/Moby.
RRO mounts have been supported since nerdctl v0.14 (containerd/nerdctl PR 511)
and Docker v25 (moby/moby PR 45278).

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
2024-02-01 09:39:36 +09:00

231 lines
6.6 KiB
Go

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"context"
"encoding/json"
"flag"
"fmt"
"os"
"path/filepath"
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
"github.com/containerd/log"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"k8s.io/klog/v2"
srvconfig "github.com/containerd/containerd/v2/cmd/containerd/server/config"
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
"github.com/containerd/containerd/v2/pkg/cri/constants"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/plugins/services"
"github.com/containerd/containerd/v2/plugins/services/warning"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
)
func init() {
config := criconfig.DefaultRuntimeConfig()
// Base plugin that other CRI services depend on.
registry.Register(&plugin.Registration{
Type: plugins.CRIServicePlugin,
ID: "runtime",
Config: &config,
Requires: []plugin.Type{
plugins.WarningPlugin,
plugins.ServicePlugin,
},
ConfigMigration: func(ctx context.Context, version int, pluginConfigs map[string]interface{}) error {
if version >= srvconfig.CurrentConfigVersion {
return nil
}
c, ok := pluginConfigs[string(plugins.GRPCPlugin)+".cri"]
if !ok {
return nil
}
conf := c.(map[string]interface{})
migrateConfig(conf)
pluginConfigs[string(plugins.CRIServicePlugin)+".runtime"] = conf
return nil
},
InitFn: initCRIRuntime,
})
}
func initCRIRuntime(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = []imagespec.Platform{platforms.DefaultSpec()}
ic.Meta.Exports = map[string]string{"CRIVersion": constants.CRIVersion}
ctx := ic.Context
pluginConfig := ic.Config.(*criconfig.RuntimeConfig)
introspectionService, err := ic.GetByID(plugins.ServicePlugin, services.IntrospectionService)
if err != nil {
return nil, fmt.Errorf("failed to get plugin (%q, %q): %w",
plugins.ServicePlugin, services.IntrospectionService, err)
}
introspectionClient, ok := introspectionService.(introspectionapi.IntrospectionClient)
if !ok {
return nil, fmt.Errorf("%+v does not implement IntrospectionClient interfae", introspectionService)
}
if warnings, err := criconfig.ValidateRuntimeConfig(ctx, pluginConfig, introspectionClient); err != nil {
return nil, fmt.Errorf("invalid plugin config: %w", err)
} else if len(warnings) > 0 {
ws, err := ic.GetSingle(plugins.WarningPlugin)
if err != nil {
return nil, err
}
warn := ws.(warning.Service)
for _, w := range warnings {
warn.Emit(ctx, w)
}
}
// For backward compatibility, we have to keep the rootDir and stateDir the same as before.
containerdRootDir := filepath.Dir(ic.Properties[plugins.PropertyRootDir])
rootDir := filepath.Join(containerdRootDir, "io.containerd.grpc.v1.cri")
containerdStateDir := filepath.Dir(ic.Properties[plugins.PropertyStateDir])
stateDir := filepath.Join(containerdStateDir, "io.containerd.grpc.v1.cri")
c := criconfig.Config{
RuntimeConfig: *pluginConfig,
ContainerdRootDir: containerdRootDir,
ContainerdEndpoint: ic.Properties[plugins.PropertyGRPCAddress],
RootDir: rootDir,
StateDir: stateDir,
}
log.G(ctx).Infof("Start cri plugin with config %+v", c)
if err := setGLogLevel(); err != nil {
return nil, fmt.Errorf("failed to set glog level: %w", err)
}
ociSpec, err := loadBaseOCISpecs(&c)
if err != nil {
return nil, fmt.Errorf("failed to create load basic oci spec: %w", err)
}
return &runtime{
config: c,
baseOCISpecs: ociSpec,
}, nil
}
// runtime contains common dependencies for CRI's runtime, image, and podsandbox services.
type runtime struct {
// Config contains all configurations.
config criconfig.Config
// BaseOCISpecs contains cached OCI specs loaded via `Runtime.BaseRuntimeSpec`
baseOCISpecs map[string]*oci.Spec
}
func (r *runtime) Config() criconfig.Config {
return r.config
}
func (r *runtime) LoadOCISpec(filename string) (*oci.Spec, error) {
spec, ok := r.baseOCISpecs[filename]
if !ok {
// TODO: Load here or only allow preloading...
return nil, errdefs.ErrNotFound
}
return spec, nil
}
func loadBaseOCISpecs(config *criconfig.Config) (map[string]*oci.Spec, error) {
specs := map[string]*oci.Spec{}
for _, cfg := range config.Runtimes {
if cfg.BaseRuntimeSpec == "" {
continue
}
// Don't load same file twice
if _, ok := specs[cfg.BaseRuntimeSpec]; ok {
continue
}
spec, err := loadOCISpec(cfg.BaseRuntimeSpec)
if err != nil {
return nil, fmt.Errorf("failed to load base OCI spec from file: %s: %w", cfg.BaseRuntimeSpec, err)
}
specs[cfg.BaseRuntimeSpec] = spec
}
return specs, nil
}
func loadOCISpec(filename string) (*oci.Spec, error) {
file, err := os.Open(filename)
if err != nil {
return nil, fmt.Errorf("failed to open base OCI spec: %s: %w", filename, err)
}
defer file.Close()
spec := oci.Spec{}
if err := json.NewDecoder(file).Decode(&spec); err != nil {
return nil, fmt.Errorf("failed to parse base OCI spec file: %w", err)
}
return &spec, nil
}
// Set glog level.
func setGLogLevel() error {
l := log.GetLevel()
fs := flag.NewFlagSet("klog", flag.PanicOnError)
klog.InitFlags(fs)
if err := fs.Set("logtostderr", "true"); err != nil {
return err
}
switch l {
case log.TraceLevel:
return fs.Set("v", "5")
case log.DebugLevel:
return fs.Set("v", "4")
case log.InfoLevel:
return fs.Set("v", "2")
default:
// glog doesn't support other filters. Defaults to v=0.
}
return nil
}
func migrateConfig(conf map[string]interface{}) {
containerdConf, ok := conf["containerd"]
if !ok {
return
}
runtimesConf, ok := containerdConf.(map[string]interface{})["runtimes"]
if !ok {
return
}
for _, v := range runtimesConf.(map[string]interface{}) {
runtimeConf := v.(map[string]interface{})
if sandboxMode, ok := runtimeConf["sandbox_mode"]; ok {
if _, ok := runtimeConf["sandboxer"]; !ok {
runtimeConf["sandboxer"] = sandboxMode
}
}
}
}