From 31a6449734f167b465a000e87e6cad2dd2a0abc5 Mon Sep 17 00:00:00 2001 From: David Leadbeater Date: Fri, 19 Aug 2022 05:22:44 +0000 Subject: [PATCH] Add capability for snapshotters to declare support for UID remapping This allows user namespace support to progress, either by allowing snapshotters to deal with ownership, or falling back to containerd doing a recursive chown. In the future, when snapshotters implement idmap mounts, they should report the "remap-ids" capability. Co-authored-by: Rodrigo Campos Signed-off-by: Rodrigo Campos Signed-off-by: David Leadbeater --- client.go | 18 +++++ container_opts.go | 10 +++ pkg/cri/server/container_create.go | 5 +- pkg/cri/server/container_create_linux.go | 5 +- pkg/cri/server/container_create_other.go | 4 +- pkg/cri/server/container_create_windows.go | 4 +- pkg/cri/server/helpers_linux.go | 91 ++++++++++++++++++++++ pkg/cri/server/sandbox_run.go | 11 ++- pkg/cri/server/sandbox_run_linux.go | 8 ++ pkg/cri/server/sandbox_run_other.go | 7 ++ pkg/cri/server/sandbox_run_windows.go | 6 ++ snapshots/snapshotter.go | 5 ++ snapshotter_opts_unix.go | 81 ++++++++++++++++++- snapshotter_opts_windows.go | 27 +++++++ 14 files changed, 270 insertions(+), 12 deletions(-) create mode 100644 snapshotter_opts_windows.go diff --git a/client.go b/client.go index 3786f8b57..4133476b1 100644 --- a/client.go +++ b/client.go @@ -866,3 +866,21 @@ func toPlatforms(pt []*apitypes.Platform) []ocispec.Platform { } return platforms } + +// GetSnapshotterCapabilities returns the capabilities of a snapshotter. +func (c *Client) GetSnapshotterCapabilities(ctx context.Context, snapshotterName string) ([]string, error) { + filters := []string{fmt.Sprintf("type==%s, id==%s", plugin.SnapshotPlugin, snapshotterName)} + in := c.IntrospectionService() + + resp, err := in.Plugins(ctx, filters) + if err != nil { + return nil, err + } + + if len(resp.Plugins) <= 0 { + return nil, fmt.Errorf("inspection service could not find snapshotter %s plugin", snapshotterName) + } + + sn := resp.Plugins[0] + return sn.Capabilities, nil +} diff --git a/container_opts.go b/container_opts.go index cf41d1aab..0719ed293 100644 --- a/container_opts.go +++ b/container_opts.go @@ -224,6 +224,11 @@ func WithNewSnapshot(id string, i Image, opts ...snapshots.Opt) NewContainerOpts if err != nil { return err } + + parent, err = resolveSnapshotOptions(ctx, client, c.Snapshotter, s, parent, opts...) + if err != nil { + return err + } if _, err := s.Prepare(ctx, id, parent, opts...); err != nil { return err } @@ -268,6 +273,11 @@ func WithNewSnapshotView(id string, i Image, opts ...snapshots.Opt) NewContainer if err != nil { return err } + + parent, err = resolveSnapshotOptions(ctx, client, c.Snapshotter, s, parent, opts...) + if err != nil { + return err + } if _, err := s.View(ctx, id, parent, opts...); err != nil { return err } diff --git a/pkg/cri/server/container_create.go b/pkg/cri/server/container_create.go index 72c4a6df1..ea4d5a02a 100644 --- a/pkg/cri/server/container_create.go +++ b/pkg/cri/server/container_create.go @@ -184,7 +184,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec)) // Grab any platform specific snapshotter opts. - sOpts := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config) + sOpts, err := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config) + if err != nil { + return nil, err + } // Set snapshotter before any other options. opts := []containerd.NewContainerOpts{ diff --git a/pkg/cri/server/container_create_linux.go b/pkg/cri/server/container_create_linux.go index a74bf5d9a..93e7469ab 100644 --- a/pkg/cri/server/container_create_linux.go +++ b/pkg/cri/server/container_create_linux.go @@ -601,6 +601,7 @@ func generateUserString(username string, uid, gid *runtime.Int64Value) (string, } // snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot -func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { - return []snapshots.Opt{} +func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { + nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions() + return snapshotterRemapOpts(nsOpts) } diff --git a/pkg/cri/server/container_create_other.go b/pkg/cri/server/container_create_other.go index 9cfb15a04..acab67c11 100644 --- a/pkg/cri/server/container_create_other.go +++ b/pkg/cri/server/container_create_other.go @@ -55,6 +55,6 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon } // snapshotterOpts returns snapshotter options for the rootfs snapshot -func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { - return []snapshots.Opt{} +func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { + return []snapshots.Opt{}, nil } diff --git a/pkg/cri/server/container_create_windows.go b/pkg/cri/server/container_create_windows.go index bd7ed0fa5..e11466545 100644 --- a/pkg/cri/server/container_create_windows.go +++ b/pkg/cri/server/container_create_windows.go @@ -145,7 +145,7 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon } // snapshotterOpts returns any Windows specific snapshotter options for the r/w layer -func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { +func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) { var opts []snapshots.Opt switch snapshotterName { @@ -160,5 +160,5 @@ func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) [] } } - return opts + return opts, nil } diff --git a/pkg/cri/server/helpers_linux.go b/pkg/cri/server/helpers_linux.go index 42b2d99a1..d0cb13006 100644 --- a/pkg/cri/server/helpers_linux.go +++ b/pkg/cri/server/helpers_linux.go @@ -28,11 +28,13 @@ import ( "syscall" "time" + "github.com/containerd/containerd" "github.com/containerd/containerd/log" "github.com/containerd/containerd/mount" "github.com/containerd/containerd/pkg/apparmor" "github.com/containerd/containerd/pkg/seccomp" "github.com/containerd/containerd/pkg/seutil" + "github.com/containerd/containerd/snapshots" "github.com/moby/sys/mountinfo" "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" @@ -275,3 +277,92 @@ func modifyProcessLabel(runtimeType string, spec *specs.Spec) error { spec.Process.SelinuxLabel = l return nil } + +func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]specs.LinuxIDMapping, error) { + var m []specs.LinuxIDMapping + + if len(runtimeIDMap) == 0 { + return m, nil + } + + if len(runtimeIDMap) > 1 { + // We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that. + return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap)) + } + + // We know len is 1 now. + if runtimeIDMap[0] == nil { + return m, nil + } + uidMap := *runtimeIDMap[0] + + if uidMap.Length < 1 { + return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length) + } + + m = []specs.LinuxIDMapping{ + { + ContainerID: uidMap.ContainerId, + HostID: uidMap.HostId, + Size: uidMap.Length, + }, + } + + return m, nil +} + +func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []specs.LinuxIDMapping, retErr error) { + if userns == nil { + // If userns is not set, the kubelet doesn't support this option + // and we should just fallback to no userns. This is completely + // valid. + return nil, nil, nil + } + + uidRuntimeMap := userns.GetUids() + gidRuntimeMap := userns.GetGids() + + uids, err := parseUsernsIDMap(uidRuntimeMap) + if err != nil { + return nil, nil, fmt.Errorf("UID mapping: %w", err) + } + + gids, err = parseUsernsIDMap(gidRuntimeMap) + if err != nil { + return nil, nil, fmt.Errorf("GID mapping: %w", err) + } + + switch mode := userns.GetMode(); mode { + case runtime.NamespaceMode_NODE: + if len(uids) != 0 || len(gids) != 0 { + return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids)) + } + case runtime.NamespaceMode_POD: + // This is valid, we will handle it in WithPodNamespaces(). + if len(uids) == 0 || len(gids) == 0 { + return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode) + } + default: + return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode) + } + + return uids, gids, nil +} + +func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) { + snapshotOpt := []snapshots.Opt{} + usernsOpts := nsOpts.GetUsernsOptions() + if usernsOpts == nil { + return snapshotOpt, nil + } + + uids, gids, err := parseUsernsIDs(usernsOpts) + if err != nil { + return nil, fmt.Errorf("user namespace configuration: %w", err) + } + + if usernsOpts.GetMode() == runtime.NamespaceMode_POD { + snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size)) + } + return snapshotOpt, nil +} diff --git a/pkg/cri/server/sandbox_run.go b/pkg/cri/server/sandbox_run.go index b7ed6c150..9419a476c 100644 --- a/pkg/cri/server/sandbox_run.go +++ b/pkg/cri/server/sandbox_run.go @@ -158,10 +158,17 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err != nil { return nil, fmt.Errorf("failed to generate runtime options: %w", err) } - snapshotterOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations)) + + sOpts := []snapshots.Opt{snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))} + extraSOpts, err := sandboxSnapshotterOpts(config) + if err != nil { + return nil, err + } + sOpts = append(sOpts, extraSOpts...) + opts := []containerd.NewContainerOpts{ containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)), - customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt), + customopts.WithNewSnapshot(id, containerdImage, sOpts...), containerd.WithSpec(spec, specOpts...), containerd.WithContainerLabels(sandboxLabels), containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata), diff --git a/pkg/cri/server/sandbox_run_linux.go b/pkg/cri/server/sandbox_run_linux.go index 21bbb1baf..5aacd76ff 100644 --- a/pkg/cri/server/sandbox_run_linux.go +++ b/pkg/cri/server/sandbox_run_linux.go @@ -25,6 +25,7 @@ import ( "github.com/containerd/containerd" "github.com/containerd/containerd/oci" "github.com/containerd/containerd/plugin" + "github.com/containerd/containerd/snapshots" imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" selinux "github.com/opencontainers/selinux/go-selinux" @@ -358,3 +359,10 @@ func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath strin } } } + +// sandboxSnapshotterOpts generates any platform specific snapshotter options +// for a sandbox container. +func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) { + nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions() + return snapshotterRemapOpts(nsOpts) +} diff --git a/pkg/cri/server/sandbox_run_other.go b/pkg/cri/server/sandbox_run_other.go index 150cc917d..1676b4760 100644 --- a/pkg/cri/server/sandbox_run_other.go +++ b/pkg/cri/server/sandbox_run_other.go @@ -21,6 +21,7 @@ package server import ( "github.com/containerd/containerd" "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/snapshots" imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -56,3 +57,9 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { } + +// sandboxSnapshotterOpts generates any platform specific snapshotter options +// for a sandbox container. +func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) { + return []snapshots.Opt{}, nil +} diff --git a/pkg/cri/server/sandbox_run_windows.go b/pkg/cri/server/sandbox_run_windows.go index 017007f66..10b9b2faf 100644 --- a/pkg/cri/server/sandbox_run_windows.go +++ b/pkg/cri/server/sandbox_run_windows.go @@ -22,6 +22,7 @@ import ( "github.com/containerd/containerd" "github.com/containerd/containerd/oci" + "github.com/containerd/containerd/snapshots" imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -116,3 +117,8 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { spec.Windows.Network.NetworkNamespace = nsPath } + +// No sandbox snapshotter options needed for windows. +func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) { + return []snapshots.Opt{}, nil +} diff --git a/snapshots/snapshotter.go b/snapshots/snapshotter.go index 9da28583b..5fa5aa530 100644 --- a/snapshots/snapshotter.go +++ b/snapshots/snapshotter.go @@ -33,6 +33,11 @@ const ( UnpackKeyFormat = UnpackKeyPrefix + "-%s %s" inheritedLabelsPrefix = "containerd.io/snapshot/" labelSnapshotRef = "containerd.io/snapshot.ref" + + // LabelSnapshotUIDMapping is the label used for UID mappings + LabelSnapshotUIDMapping = "containerd.io/snapshot/uidmapping" + // LabelSnapshotGIDMapping is the label used for GID mappings + LabelSnapshotGIDMapping = "containerd.io/snapshot/gidmapping" ) // Kind identifies the kind of snapshot. diff --git a/snapshotter_opts_unix.go b/snapshotter_opts_unix.go index e25588b09..4739e192f 100644 --- a/snapshotter_opts_unix.go +++ b/snapshotter_opts_unix.go @@ -19,17 +19,92 @@ package containerd import ( + "context" "fmt" "github.com/containerd/containerd/snapshots" ) +const ( + capabRemapIDs = "remap-ids" +) + // WithRemapperLabels creates the labels used by any supporting snapshotter // to shift the filesystem ownership (user namespace mapping) automatically; currently // supported by the fuse-overlayfs snapshotter func WithRemapperLabels(ctrUID, hostUID, ctrGID, hostGID, length uint32) snapshots.Opt { return snapshots.WithLabels(map[string]string{ - "containerd.io/snapshot/uidmapping": fmt.Sprintf("%d:%d:%d", ctrUID, hostUID, length), - "containerd.io/snapshot/gidmapping": fmt.Sprintf("%d:%d:%d", ctrGID, hostGID, length), - }) + snapshots.LabelSnapshotUIDMapping: fmt.Sprintf("%d:%d:%d", ctrUID, hostUID, length), + snapshots.LabelSnapshotGIDMapping: fmt.Sprintf("%d:%d:%d", ctrGID, hostGID, length)}) +} + +func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName string, snapshotter snapshots.Snapshotter, parent string, opts ...snapshots.Opt) (string, error) { + capabs, err := client.GetSnapshotterCapabilities(ctx, snapshotterName) + if err != nil { + return "", err + } + + for _, capab := range capabs { + if capab == capabRemapIDs { + // Snapshotter supports ID remapping, we don't need to do anything. + return parent, nil + } + } + + var local snapshots.Info + for _, opt := range opts { + opt(&local) + } + + needsRemap := false + var uidMap, gidMap string + + if value, ok := local.Labels[snapshots.LabelSnapshotUIDMapping]; ok { + needsRemap = true + uidMap = value + } + if value, ok := local.Labels[snapshots.LabelSnapshotGIDMapping]; ok { + needsRemap = true + gidMap = value + } + + if !needsRemap { + return parent, nil + } + + var ctrUID, hostUID, length uint32 + _, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length) + if err != nil { + return "", fmt.Errorf("uidMap unparsable: %w", err) + } + + var ctrGID, hostGID, lengthGID uint32 + _, err = fmt.Sscanf(gidMap, "%d:%d:%d", &ctrGID, &hostGID, &lengthGID) + if err != nil { + return "", fmt.Errorf("gidMap unparsable: %w", err) + } + + if ctrUID != 0 || ctrGID != 0 { + return "", fmt.Errorf("Container UID/GID of 0 only supported currently (%d/%d)", ctrUID, ctrGID) + } + + // TODO(dgl): length isn't taken into account for the intermediate snapshot id. + usernsID := fmt.Sprintf("%s-%d-%d", parent, hostUID, hostGID) + if _, err := snapshotter.Stat(ctx, usernsID); err == nil { + return usernsID, nil + } + mounts, err := snapshotter.Prepare(ctx, usernsID+"-remap", parent) + if err != nil { + return "", err + } + // TODO(dgl): length isn't taken into account here yet either. + if err := remapRootFS(ctx, mounts, hostUID, hostGID); err != nil { + snapshotter.Remove(ctx, usernsID+"-remap") + return "", err + } + if err := snapshotter.Commit(ctx, usernsID, usernsID+"-remap"); err != nil { + return "", err + } + + return usernsID, nil } diff --git a/snapshotter_opts_windows.go b/snapshotter_opts_windows.go new file mode 100644 index 000000000..540bcb313 --- /dev/null +++ b/snapshotter_opts_windows.go @@ -0,0 +1,27 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package containerd + +import ( + "context" + + "github.com/containerd/containerd/snapshots" +) + +func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName string, snapshotter snapshots.Snapshotter, parent string, opts ...snapshots.Opt) (string, error) { + return parent, nil +}