Merge pull request #9117 from kinvolk/rata/userns-chown-opt-in

Require opt-in for rootfs chown when idmap mounts is not supported
This commit is contained in:
Akihiro Suda 2023-09-28 02:34:41 +09:00 committed by GitHub
commit 9ca6fd9e6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 79 additions and 10 deletions

View File

@ -86,22 +86,22 @@ Different containerd versions have different limitations too, those are highligh
### containerd 1.7
One limitation present in containerd 1.7 is that it needs to change the ownership of every file and
directory inside the container image, during Pod startup. This means it has a storage overhead (the
size of the container image is duplicated each time a pod is created) and can significantly impact
the container startup latency.
directory inside the container image, during Pod startup. This means it has a storage overhead, as
**the size of the container image is duplicated each time a pod is created**, and can significantly
impact the container startup latency, as doing such a copy takes time too.
You can mitigate this limitation by switching `/sys/module/overlay/parameters/metacopy` to `Y`. This
will significantly reduce the storage and performance overhead, as only the inode for each file of
the container image will be duplicated, but not the content of the file. This means it will use less
storage and it will be faster. However, it is not a panacea.
If you change the metacopy param, make sure to do it in a way that is persistant across reboots. You
If you change the metacopy param, make sure to do it in a way that is persistent across reboots. You
should also be aware that this setting will be used for all containers, not just containers with
user namespaces enabled. This will affect all the snapshots that you take manually (if you happen to
do that). In that case, make sure to use the same value of `/sys/module/overlay/parameters/metacopy`
when creating and restoring the snapshot.
### containerd 2.0
### containerd 2.0 and above
The storage and latency limitation from containerd 1.7 are not present in container 2.0 and above,
if you use the overlay snapshotter (this is used by default). It will not use more storage at all,
@ -111,8 +111,36 @@ This is achieved by using the kernel feature idmap mounts with the container roo
image). This allows an overlay file-system to expose the image with different UID/GID without copying
the files nor the inodes, just using a bind-mount.
You can check if you are using idmap mounts for the container image if you create a pod with user
namespaces, exec into it and run:
Containerd by default will refuse to create a container with user namespaces, if overlayfs is the
snapshotter and the kernel running doesn't support idmap mounts for overlayfs. This is to make sure
before falling back to the expensive chown (in terms of storage and pod startup latency), you
understand the implications and decide to opt-in. Please read the containerd 1.7 limitations for an
explanation of those.
If your kernel doesn't support idmap mounts for the overlayfs snapshotter, you will see an error
like:
```
failed to create containerd container: snapshotter "overlayfs" doesn't support idmap mounts on this host, configure `slow_chown` to allow a slower and expensive fallback
```
Linux supports idmap mounts on an overlayfs since version 5.19.
You can opt-in for the slow chown by adding the `slow_chown` field to your config in the overlayfs
snapshotter section, like this:
```
[plugins."io.containerd.snapshotter.v1.overlayfs"]
slow_chown = true
```
Note that only overlayfs users need to opt-in for the slow chown, as it as it is the only one that
containerd provides a better option (only the overlayfs snapshotter supports idmap mounts in
containerd). If you use another snapshotter, you will fall-back to the expensive chown without the
need to opt-in.
That being said, you can double check if your container is using idmap mounts for the container
image if you create a pod with user namespaces, exec into it and run:
```
mount | grep overlay

View File

@ -52,6 +52,13 @@ version=2
[plugins."io.containerd.grpc.v1.cri"]
drain_exec_sync_io_timeout = "10s"
# Userns requires idmap mount support for overlayfs (added in 5.19)
# Let's opt-in for a recursive chown, so we can always test this even in old distros.
# Note that if idmap mounts support is present, we will use that, so it is harmless to keep this
# here.
[plugins."io.containerd.snapshotter.v1.overlayfs"]
slow_chown = true
EOF
if command -v sestatus >/dev/null 2>&1; then

View File

@ -46,6 +46,7 @@ type SnapshotterConfig struct {
ms MetaStore
mountOptions []string
remapIds bool
slowChown bool
}
// Opt is an option to configure the overlay snapshotter
@ -98,6 +99,11 @@ func WithRemapIds(config *SnapshotterConfig) error {
return nil
}
func WithSlowChown(config *SnapshotterConfig) error {
config.slowChown = true
return nil
}
type snapshotter struct {
root string
ms MetaStore
@ -105,6 +111,7 @@ type snapshotter struct {
upperdirLabel bool
options []string
remapIds bool
slowChown bool
}
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
@ -161,6 +168,7 @@ func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
upperdirLabel: config.upperdirLabel,
options: config.mountOptions,
remapIds: config.remapIds,
slowChown: config.slowChown,
}, nil
}

View File

@ -29,6 +29,7 @@ import (
const (
capaRemapIds = "remap-ids"
capaOnlyRemapIds = "only-remap-ids"
)
// Config represents configuration for the overlay plugin.
@ -38,6 +39,11 @@ type Config struct {
UpperdirLabel bool `toml:"upperdir_label"`
SyncRemove bool `toml:"sync_remove"`
// slowChown allows the plugin to fallback to a recursive chown if fast options (like
// idmap mounts) are not available. See more info about the overhead this can have in
// github.com/containerd/containerd/docs/user-namespaces/.
SlowChown bool `toml:"slow_chown"`
// MountOptions are options used for the overlay mount (not used on bind mounts)
MountOptions []string `toml:"mount_options"`
}
@ -76,6 +82,14 @@ func init() {
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaRemapIds)
}
if config.SlowChown {
oOpts = append(oOpts, overlay.WithSlowChown)
} else {
// If slowChown is false, we use capaOnlyRemapIds to signal we only
// allow idmap mounts.
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaOnlyRemapIds)
}
ic.Meta.Exports["root"] = root
return overlay.NewSnapshotter(root, oOpts...)
},

View File

@ -26,7 +26,8 @@ import (
)
const (
capabRemapIDs = "remap-ids"
capaRemapIDs = "remap-ids"
capaOnlyRemapIds = "only-remap-ids"
)
// WithRemapperLabels creates the labels used by any supporting snapshotter
@ -45,7 +46,7 @@ func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName
}
for _, capab := range capabs {
if capab == capabRemapIDs {
if capab == capaRemapIDs {
// Snapshotter supports ID remapping, we don't need to do anything.
return parent, nil
}
@ -72,6 +73,17 @@ func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName
return parent, nil
}
capaOnlyRemap := false
for _, capa := range capabs {
if capa == capaOnlyRemapIds {
capaOnlyRemap = true
}
}
if capaOnlyRemap {
return "", fmt.Errorf("snapshotter %q doesn't support idmap mounts on this host, configure `slow_chown` to allow a slower and expensive fallback", snapshotterName)
}
var ctrUID, hostUID, length uint32
_, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length)
if err != nil {