Merge pull request #9117 from kinvolk/rata/userns-chown-opt-in
Require opt-in for rootfs chown when idmap mounts is not supported
This commit is contained in:
commit
9ca6fd9e6e
@ -86,22 +86,22 @@ Different containerd versions have different limitations too, those are highligh
|
|||||||
### containerd 1.7
|
### containerd 1.7
|
||||||
|
|
||||||
One limitation present in containerd 1.7 is that it needs to change the ownership of every file and
|
One limitation present in containerd 1.7 is that it needs to change the ownership of every file and
|
||||||
directory inside the container image, during Pod startup. This means it has a storage overhead (the
|
directory inside the container image, during Pod startup. This means it has a storage overhead, as
|
||||||
size of the container image is duplicated each time a pod is created) and can significantly impact
|
**the size of the container image is duplicated each time a pod is created**, and can significantly
|
||||||
the container startup latency.
|
impact the container startup latency, as doing such a copy takes time too.
|
||||||
|
|
||||||
You can mitigate this limitation by switching `/sys/module/overlay/parameters/metacopy` to `Y`. This
|
You can mitigate this limitation by switching `/sys/module/overlay/parameters/metacopy` to `Y`. This
|
||||||
will significantly reduce the storage and performance overhead, as only the inode for each file of
|
will significantly reduce the storage and performance overhead, as only the inode for each file of
|
||||||
the container image will be duplicated, but not the content of the file. This means it will use less
|
the container image will be duplicated, but not the content of the file. This means it will use less
|
||||||
storage and it will be faster. However, it is not a panacea.
|
storage and it will be faster. However, it is not a panacea.
|
||||||
|
|
||||||
If you change the metacopy param, make sure to do it in a way that is persistant across reboots. You
|
If you change the metacopy param, make sure to do it in a way that is persistent across reboots. You
|
||||||
should also be aware that this setting will be used for all containers, not just containers with
|
should also be aware that this setting will be used for all containers, not just containers with
|
||||||
user namespaces enabled. This will affect all the snapshots that you take manually (if you happen to
|
user namespaces enabled. This will affect all the snapshots that you take manually (if you happen to
|
||||||
do that). In that case, make sure to use the same value of `/sys/module/overlay/parameters/metacopy`
|
do that). In that case, make sure to use the same value of `/sys/module/overlay/parameters/metacopy`
|
||||||
when creating and restoring the snapshot.
|
when creating and restoring the snapshot.
|
||||||
|
|
||||||
### containerd 2.0
|
### containerd 2.0 and above
|
||||||
|
|
||||||
The storage and latency limitation from containerd 1.7 are not present in container 2.0 and above,
|
The storage and latency limitation from containerd 1.7 are not present in container 2.0 and above,
|
||||||
if you use the overlay snapshotter (this is used by default). It will not use more storage at all,
|
if you use the overlay snapshotter (this is used by default). It will not use more storage at all,
|
||||||
@ -111,8 +111,36 @@ This is achieved by using the kernel feature idmap mounts with the container roo
|
|||||||
image). This allows an overlay file-system to expose the image with different UID/GID without copying
|
image). This allows an overlay file-system to expose the image with different UID/GID without copying
|
||||||
the files nor the inodes, just using a bind-mount.
|
the files nor the inodes, just using a bind-mount.
|
||||||
|
|
||||||
You can check if you are using idmap mounts for the container image if you create a pod with user
|
Containerd by default will refuse to create a container with user namespaces, if overlayfs is the
|
||||||
namespaces, exec into it and run:
|
snapshotter and the kernel running doesn't support idmap mounts for overlayfs. This is to make sure
|
||||||
|
before falling back to the expensive chown (in terms of storage and pod startup latency), you
|
||||||
|
understand the implications and decide to opt-in. Please read the containerd 1.7 limitations for an
|
||||||
|
explanation of those.
|
||||||
|
|
||||||
|
If your kernel doesn't support idmap mounts for the overlayfs snapshotter, you will see an error
|
||||||
|
like:
|
||||||
|
|
||||||
|
```
|
||||||
|
failed to create containerd container: snapshotter "overlayfs" doesn't support idmap mounts on this host, configure `slow_chown` to allow a slower and expensive fallback
|
||||||
|
```
|
||||||
|
|
||||||
|
Linux supports idmap mounts on an overlayfs since version 5.19.
|
||||||
|
|
||||||
|
You can opt-in for the slow chown by adding the `slow_chown` field to your config in the overlayfs
|
||||||
|
snapshotter section, like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
[plugins."io.containerd.snapshotter.v1.overlayfs"]
|
||||||
|
slow_chown = true
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that only overlayfs users need to opt-in for the slow chown, as it as it is the only one that
|
||||||
|
containerd provides a better option (only the overlayfs snapshotter supports idmap mounts in
|
||||||
|
containerd). If you use another snapshotter, you will fall-back to the expensive chown without the
|
||||||
|
need to opt-in.
|
||||||
|
|
||||||
|
That being said, you can double check if your container is using idmap mounts for the container
|
||||||
|
image if you create a pod with user namespaces, exec into it and run:
|
||||||
|
|
||||||
```
|
```
|
||||||
mount | grep overlay
|
mount | grep overlay
|
||||||
|
@ -52,6 +52,13 @@ version=2
|
|||||||
|
|
||||||
[plugins."io.containerd.grpc.v1.cri"]
|
[plugins."io.containerd.grpc.v1.cri"]
|
||||||
drain_exec_sync_io_timeout = "10s"
|
drain_exec_sync_io_timeout = "10s"
|
||||||
|
|
||||||
|
# Userns requires idmap mount support for overlayfs (added in 5.19)
|
||||||
|
# Let's opt-in for a recursive chown, so we can always test this even in old distros.
|
||||||
|
# Note that if idmap mounts support is present, we will use that, so it is harmless to keep this
|
||||||
|
# here.
|
||||||
|
[plugins."io.containerd.snapshotter.v1.overlayfs"]
|
||||||
|
slow_chown = true
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
if command -v sestatus >/dev/null 2>&1; then
|
if command -v sestatus >/dev/null 2>&1; then
|
||||||
|
@ -46,6 +46,7 @@ type SnapshotterConfig struct {
|
|||||||
ms MetaStore
|
ms MetaStore
|
||||||
mountOptions []string
|
mountOptions []string
|
||||||
remapIds bool
|
remapIds bool
|
||||||
|
slowChown bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Opt is an option to configure the overlay snapshotter
|
// Opt is an option to configure the overlay snapshotter
|
||||||
@ -98,6 +99,11 @@ func WithRemapIds(config *SnapshotterConfig) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithSlowChown(config *SnapshotterConfig) error {
|
||||||
|
config.slowChown = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type snapshotter struct {
|
type snapshotter struct {
|
||||||
root string
|
root string
|
||||||
ms MetaStore
|
ms MetaStore
|
||||||
@ -105,6 +111,7 @@ type snapshotter struct {
|
|||||||
upperdirLabel bool
|
upperdirLabel bool
|
||||||
options []string
|
options []string
|
||||||
remapIds bool
|
remapIds bool
|
||||||
|
slowChown bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
|
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
|
||||||
@ -161,6 +168,7 @@ func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
|
|||||||
upperdirLabel: config.upperdirLabel,
|
upperdirLabel: config.upperdirLabel,
|
||||||
options: config.mountOptions,
|
options: config.mountOptions,
|
||||||
remapIds: config.remapIds,
|
remapIds: config.remapIds,
|
||||||
|
slowChown: config.slowChown,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,7 +28,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
capaRemapIds = "remap-ids"
|
capaRemapIds = "remap-ids"
|
||||||
|
capaOnlyRemapIds = "only-remap-ids"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Config represents configuration for the overlay plugin.
|
// Config represents configuration for the overlay plugin.
|
||||||
@ -38,6 +39,11 @@ type Config struct {
|
|||||||
UpperdirLabel bool `toml:"upperdir_label"`
|
UpperdirLabel bool `toml:"upperdir_label"`
|
||||||
SyncRemove bool `toml:"sync_remove"`
|
SyncRemove bool `toml:"sync_remove"`
|
||||||
|
|
||||||
|
// slowChown allows the plugin to fallback to a recursive chown if fast options (like
|
||||||
|
// idmap mounts) are not available. See more info about the overhead this can have in
|
||||||
|
// github.com/containerd/containerd/docs/user-namespaces/.
|
||||||
|
SlowChown bool `toml:"slow_chown"`
|
||||||
|
|
||||||
// MountOptions are options used for the overlay mount (not used on bind mounts)
|
// MountOptions are options used for the overlay mount (not used on bind mounts)
|
||||||
MountOptions []string `toml:"mount_options"`
|
MountOptions []string `toml:"mount_options"`
|
||||||
}
|
}
|
||||||
@ -76,6 +82,14 @@ func init() {
|
|||||||
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaRemapIds)
|
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaRemapIds)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.SlowChown {
|
||||||
|
oOpts = append(oOpts, overlay.WithSlowChown)
|
||||||
|
} else {
|
||||||
|
// If slowChown is false, we use capaOnlyRemapIds to signal we only
|
||||||
|
// allow idmap mounts.
|
||||||
|
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaOnlyRemapIds)
|
||||||
|
}
|
||||||
|
|
||||||
ic.Meta.Exports["root"] = root
|
ic.Meta.Exports["root"] = root
|
||||||
return overlay.NewSnapshotter(root, oOpts...)
|
return overlay.NewSnapshotter(root, oOpts...)
|
||||||
},
|
},
|
||||||
|
@ -26,7 +26,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
capabRemapIDs = "remap-ids"
|
capaRemapIDs = "remap-ids"
|
||||||
|
capaOnlyRemapIds = "only-remap-ids"
|
||||||
)
|
)
|
||||||
|
|
||||||
// WithRemapperLabels creates the labels used by any supporting snapshotter
|
// WithRemapperLabels creates the labels used by any supporting snapshotter
|
||||||
@ -45,7 +46,7 @@ func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, capab := range capabs {
|
for _, capab := range capabs {
|
||||||
if capab == capabRemapIDs {
|
if capab == capaRemapIDs {
|
||||||
// Snapshotter supports ID remapping, we don't need to do anything.
|
// Snapshotter supports ID remapping, we don't need to do anything.
|
||||||
return parent, nil
|
return parent, nil
|
||||||
}
|
}
|
||||||
@ -72,6 +73,17 @@ func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName
|
|||||||
return parent, nil
|
return parent, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
capaOnlyRemap := false
|
||||||
|
for _, capa := range capabs {
|
||||||
|
if capa == capaOnlyRemapIds {
|
||||||
|
capaOnlyRemap = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if capaOnlyRemap {
|
||||||
|
return "", fmt.Errorf("snapshotter %q doesn't support idmap mounts on this host, configure `slow_chown` to allow a slower and expensive fallback", snapshotterName)
|
||||||
|
}
|
||||||
|
|
||||||
var ctrUID, hostUID, length uint32
|
var ctrUID, hostUID, length uint32
|
||||||
_, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length)
|
_, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user