Merge pull request #9117 from kinvolk/rata/userns-chown-opt-in
Require opt-in for rootfs chown when idmap mounts is not supported
This commit is contained in:
commit
9ca6fd9e6e
@ -86,22 +86,22 @@ Different containerd versions have different limitations too, those are highligh
|
||||
### containerd 1.7
|
||||
|
||||
One limitation present in containerd 1.7 is that it needs to change the ownership of every file and
|
||||
directory inside the container image, during Pod startup. This means it has a storage overhead (the
|
||||
size of the container image is duplicated each time a pod is created) and can significantly impact
|
||||
the container startup latency.
|
||||
directory inside the container image, during Pod startup. This means it has a storage overhead, as
|
||||
**the size of the container image is duplicated each time a pod is created**, and can significantly
|
||||
impact the container startup latency, as doing such a copy takes time too.
|
||||
|
||||
You can mitigate this limitation by switching `/sys/module/overlay/parameters/metacopy` to `Y`. This
|
||||
will significantly reduce the storage and performance overhead, as only the inode for each file of
|
||||
the container image will be duplicated, but not the content of the file. This means it will use less
|
||||
storage and it will be faster. However, it is not a panacea.
|
||||
|
||||
If you change the metacopy param, make sure to do it in a way that is persistant across reboots. You
|
||||
If you change the metacopy param, make sure to do it in a way that is persistent across reboots. You
|
||||
should also be aware that this setting will be used for all containers, not just containers with
|
||||
user namespaces enabled. This will affect all the snapshots that you take manually (if you happen to
|
||||
do that). In that case, make sure to use the same value of `/sys/module/overlay/parameters/metacopy`
|
||||
when creating and restoring the snapshot.
|
||||
|
||||
### containerd 2.0
|
||||
### containerd 2.0 and above
|
||||
|
||||
The storage and latency limitation from containerd 1.7 are not present in container 2.0 and above,
|
||||
if you use the overlay snapshotter (this is used by default). It will not use more storage at all,
|
||||
@ -111,8 +111,36 @@ This is achieved by using the kernel feature idmap mounts with the container roo
|
||||
image). This allows an overlay file-system to expose the image with different UID/GID without copying
|
||||
the files nor the inodes, just using a bind-mount.
|
||||
|
||||
You can check if you are using idmap mounts for the container image if you create a pod with user
|
||||
namespaces, exec into it and run:
|
||||
Containerd by default will refuse to create a container with user namespaces, if overlayfs is the
|
||||
snapshotter and the kernel running doesn't support idmap mounts for overlayfs. This is to make sure
|
||||
before falling back to the expensive chown (in terms of storage and pod startup latency), you
|
||||
understand the implications and decide to opt-in. Please read the containerd 1.7 limitations for an
|
||||
explanation of those.
|
||||
|
||||
If your kernel doesn't support idmap mounts for the overlayfs snapshotter, you will see an error
|
||||
like:
|
||||
|
||||
```
|
||||
failed to create containerd container: snapshotter "overlayfs" doesn't support idmap mounts on this host, configure `slow_chown` to allow a slower and expensive fallback
|
||||
```
|
||||
|
||||
Linux supports idmap mounts on an overlayfs since version 5.19.
|
||||
|
||||
You can opt-in for the slow chown by adding the `slow_chown` field to your config in the overlayfs
|
||||
snapshotter section, like this:
|
||||
|
||||
```
|
||||
[plugins."io.containerd.snapshotter.v1.overlayfs"]
|
||||
slow_chown = true
|
||||
```
|
||||
|
||||
Note that only overlayfs users need to opt-in for the slow chown, as it as it is the only one that
|
||||
containerd provides a better option (only the overlayfs snapshotter supports idmap mounts in
|
||||
containerd). If you use another snapshotter, you will fall-back to the expensive chown without the
|
||||
need to opt-in.
|
||||
|
||||
That being said, you can double check if your container is using idmap mounts for the container
|
||||
image if you create a pod with user namespaces, exec into it and run:
|
||||
|
||||
```
|
||||
mount | grep overlay
|
||||
|
@ -52,6 +52,13 @@ version=2
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
drain_exec_sync_io_timeout = "10s"
|
||||
|
||||
# Userns requires idmap mount support for overlayfs (added in 5.19)
|
||||
# Let's opt-in for a recursive chown, so we can always test this even in old distros.
|
||||
# Note that if idmap mounts support is present, we will use that, so it is harmless to keep this
|
||||
# here.
|
||||
[plugins."io.containerd.snapshotter.v1.overlayfs"]
|
||||
slow_chown = true
|
||||
EOF
|
||||
|
||||
if command -v sestatus >/dev/null 2>&1; then
|
||||
|
@ -46,6 +46,7 @@ type SnapshotterConfig struct {
|
||||
ms MetaStore
|
||||
mountOptions []string
|
||||
remapIds bool
|
||||
slowChown bool
|
||||
}
|
||||
|
||||
// Opt is an option to configure the overlay snapshotter
|
||||
@ -98,6 +99,11 @@ func WithRemapIds(config *SnapshotterConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func WithSlowChown(config *SnapshotterConfig) error {
|
||||
config.slowChown = true
|
||||
return nil
|
||||
}
|
||||
|
||||
type snapshotter struct {
|
||||
root string
|
||||
ms MetaStore
|
||||
@ -105,6 +111,7 @@ type snapshotter struct {
|
||||
upperdirLabel bool
|
||||
options []string
|
||||
remapIds bool
|
||||
slowChown bool
|
||||
}
|
||||
|
||||
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
|
||||
@ -161,6 +168,7 @@ func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
|
||||
upperdirLabel: config.upperdirLabel,
|
||||
options: config.mountOptions,
|
||||
remapIds: config.remapIds,
|
||||
slowChown: config.slowChown,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -28,7 +28,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
capaRemapIds = "remap-ids"
|
||||
capaRemapIds = "remap-ids"
|
||||
capaOnlyRemapIds = "only-remap-ids"
|
||||
)
|
||||
|
||||
// Config represents configuration for the overlay plugin.
|
||||
@ -38,6 +39,11 @@ type Config struct {
|
||||
UpperdirLabel bool `toml:"upperdir_label"`
|
||||
SyncRemove bool `toml:"sync_remove"`
|
||||
|
||||
// slowChown allows the plugin to fallback to a recursive chown if fast options (like
|
||||
// idmap mounts) are not available. See more info about the overhead this can have in
|
||||
// github.com/containerd/containerd/docs/user-namespaces/.
|
||||
SlowChown bool `toml:"slow_chown"`
|
||||
|
||||
// MountOptions are options used for the overlay mount (not used on bind mounts)
|
||||
MountOptions []string `toml:"mount_options"`
|
||||
}
|
||||
@ -76,6 +82,14 @@ func init() {
|
||||
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaRemapIds)
|
||||
}
|
||||
|
||||
if config.SlowChown {
|
||||
oOpts = append(oOpts, overlay.WithSlowChown)
|
||||
} else {
|
||||
// If slowChown is false, we use capaOnlyRemapIds to signal we only
|
||||
// allow idmap mounts.
|
||||
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaOnlyRemapIds)
|
||||
}
|
||||
|
||||
ic.Meta.Exports["root"] = root
|
||||
return overlay.NewSnapshotter(root, oOpts...)
|
||||
},
|
||||
|
@ -26,7 +26,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
capabRemapIDs = "remap-ids"
|
||||
capaRemapIDs = "remap-ids"
|
||||
capaOnlyRemapIds = "only-remap-ids"
|
||||
)
|
||||
|
||||
// WithRemapperLabels creates the labels used by any supporting snapshotter
|
||||
@ -45,7 +46,7 @@ func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName
|
||||
}
|
||||
|
||||
for _, capab := range capabs {
|
||||
if capab == capabRemapIDs {
|
||||
if capab == capaRemapIDs {
|
||||
// Snapshotter supports ID remapping, we don't need to do anything.
|
||||
return parent, nil
|
||||
}
|
||||
@ -72,6 +73,17 @@ func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName
|
||||
return parent, nil
|
||||
}
|
||||
|
||||
capaOnlyRemap := false
|
||||
for _, capa := range capabs {
|
||||
if capa == capaOnlyRemapIds {
|
||||
capaOnlyRemap = true
|
||||
}
|
||||
}
|
||||
|
||||
if capaOnlyRemap {
|
||||
return "", fmt.Errorf("snapshotter %q doesn't support idmap mounts on this host, configure `slow_chown` to allow a slower and expensive fallback", snapshotterName)
|
||||
}
|
||||
|
||||
var ctrUID, hostUID, length uint32
|
||||
_, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user