Merge pull request #5890 from artqzn/idmapped_mounts
RFC: Initial support of idmapped mount points
This commit is contained in:
commit
0ee2433c94
@ -186,8 +186,9 @@ func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli
|
||||
opts = append(opts,
|
||||
oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap}))
|
||||
// use snapshotter opts or the remapped snapshot support to shift the filesystem
|
||||
// currently the only snapshotter known to support the labels is fuse-overlayfs:
|
||||
// https://github.com/AkihiroSuda/containerd-fuse-overlayfs
|
||||
// currently the snapshotters known to support the labels are:
|
||||
// fuse-overlayfs - https://github.com/containerd/fuse-overlayfs-snapshotter
|
||||
// overlay - in case of idmapped mount points are supported by host kernel (Linux kernel 5.19)
|
||||
if context.Bool("remap-labels") {
|
||||
cOpts = append(cOpts, containerd.WithNewSnapshot(id, image,
|
||||
containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size)))
|
||||
|
123
integration/client/container_idmapped_linux_test.go
Normal file
123
integration/client/container_idmapped_linux_test.go
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package client
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/snapshots/overlay/overlayutils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
func TestIDMappedOverlay(t *testing.T) {
|
||||
var (
|
||||
upperPath string
|
||||
lowerPaths []string
|
||||
snapshotter = "overlayfs"
|
||||
ctx, cancel = testContext(t)
|
||||
id = t.Name()
|
||||
)
|
||||
defer cancel()
|
||||
|
||||
if ok, err := overlayutils.SupportsIDMappedMounts(); err != nil || !ok {
|
||||
t.Skip("overlayfs doesn't support idmapped mounts")
|
||||
}
|
||||
|
||||
client, err := newClient(t, address)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
image, err := client.Pull(ctx, testMultiLayeredImage, containerd.WithPullUnpack)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("image %s pulled!", testMultiLayeredImage)
|
||||
|
||||
hostID := uint32(33)
|
||||
contID := uint32(0)
|
||||
length := uint32(65536)
|
||||
|
||||
uidMap := specs.LinuxIDMapping{
|
||||
ContainerID: contID,
|
||||
HostID: hostID,
|
||||
Size: length,
|
||||
}
|
||||
gidMap := specs.LinuxIDMapping{
|
||||
ContainerID: contID,
|
||||
HostID: hostID,
|
||||
Size: length,
|
||||
}
|
||||
|
||||
container, err := client.NewContainer(ctx, id,
|
||||
containerd.WithImage(image),
|
||||
containerd.WithImageConfigLabels(image),
|
||||
containerd.WithSnapshotter(snapshotter),
|
||||
containerd.WithNewSnapshot(id, image, containerd.WithRemapperLabels(uidMap.ContainerID, uidMap.HostID, gidMap.ContainerID, gidMap.HostID, length)),
|
||||
containerd.WithNewSpec(oci.WithImageConfig(image),
|
||||
oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap}),
|
||||
longCommand))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer container.Delete(ctx, containerd.WithSnapshotCleanup)
|
||||
|
||||
t.Logf("container %s created!", id)
|
||||
o := client.SnapshotService(snapshotter)
|
||||
mounts, err := o.Mounts(ctx, id)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
m := mounts[0]
|
||||
if m.Type != "overlay" {
|
||||
t.Fatalf("invalid mount -- %s; expected %s", m.Type, snapshotter)
|
||||
}
|
||||
|
||||
for _, o := range m.Options {
|
||||
if strings.HasPrefix(o, "upperdir=") {
|
||||
upperPath = strings.TrimPrefix(o, "upperdir=")
|
||||
} else if strings.HasPrefix(o, "lowerdir=") {
|
||||
lowerPaths = strings.Split(strings.TrimPrefix(o, "lowerdir="), ",")
|
||||
}
|
||||
}
|
||||
|
||||
t.Log("check lowerdirs")
|
||||
for _, l := range lowerPaths {
|
||||
if _, err := os.Stat(l); err == nil {
|
||||
t.Fatalf("lowerdir=%s should not exist", l)
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("check stats of uppedir=%s", upperPath)
|
||||
st, err := os.Stat(upperPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to stat %s", upperPath)
|
||||
}
|
||||
|
||||
if stat, ok := st.Sys().(*syscall.Stat_t); !ok {
|
||||
t.Fatalf("incompatible types after stat call: *syscall.Stat_t expected")
|
||||
} else if stat.Uid != uidMap.HostID || stat.Gid != gidMap.HostID {
|
||||
t.Fatalf("bad mapping: expected {uid: %d, gid: %d}; real {uid: %d, gid: %d}", uidMap.HostID, gidMap.HostID, int(stat.Uid), int(stat.Gid))
|
||||
}
|
||||
}
|
166
mount/mount_idmapped_linux.go
Normal file
166
mount/mount_idmapped_linux.go
Normal file
@ -0,0 +1,166 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/containerd/containerd/sys"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// TODO: Support multiple mappings in future
|
||||
func parseIDMapping(mapping string) ([]syscall.SysProcIDMap, error) {
|
||||
parts := strings.Split(mapping, ":")
|
||||
if len(parts) != 3 {
|
||||
return nil, fmt.Errorf("user namespace mappings require the format `container-id:host-id:size`")
|
||||
}
|
||||
cID, err := strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid container id for user namespace remapping, %w", err)
|
||||
}
|
||||
hID, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid host id for user namespace remapping, %w", err)
|
||||
}
|
||||
size, err := strconv.Atoi(parts[2])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid size for user namespace remapping, %w", err)
|
||||
}
|
||||
if cID != 0 || hID < 0 || size < 0 {
|
||||
return nil, fmt.Errorf("invalid mapping %s, all IDs and size must be positive integers (container ID of 0 is only supported)", mapping)
|
||||
}
|
||||
return []syscall.SysProcIDMap{
|
||||
{
|
||||
ContainerID: cID,
|
||||
HostID: hID,
|
||||
Size: size,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// IDMapMount applies GID/UID shift according to gidmap/uidmap for target path
|
||||
func IDMapMount(source, target string, usernsFd int) (err error) {
|
||||
var (
|
||||
attr unix.MountAttr
|
||||
)
|
||||
|
||||
attr.Attr_set = unix.MOUNT_ATTR_IDMAP
|
||||
attr.Attr_clr = 0
|
||||
attr.Propagation = 0
|
||||
attr.Userns_fd = uint64(usernsFd)
|
||||
|
||||
dFd, err := unix.OpenTree(-int(unix.EBADF), source, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC|unix.AT_EMPTY_PATH))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to open tree for %s: %w", target, err)
|
||||
}
|
||||
|
||||
defer unix.Close(dFd)
|
||||
if err = unix.MountSetattr(dFd, "", unix.AT_EMPTY_PATH, &attr); err != nil {
|
||||
return fmt.Errorf("Unable to shift GID/UID for %s: %w", target, err)
|
||||
}
|
||||
|
||||
if err = unix.MoveMount(dFd, "", -int(unix.EBADF), target, unix.MOVE_MOUNT_F_EMPTY_PATH); err != nil {
|
||||
return fmt.Errorf("Unable to attach mount tree to %s: %w", target, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetUsernsFD forks the current process and creates a user namespace using the specified
|
||||
// mappings.
|
||||
//
|
||||
// It returns:
|
||||
// 1. The file descriptor of the /proc/[pid]/ns/user of the newly
|
||||
// created mapping.
|
||||
// 2. "Clean up" function that should be called once user namespace
|
||||
// file descriptor is no longer needed.
|
||||
// 3. Usual error.
|
||||
func GetUsernsFD(uidmap, gidmap string) (_ int, _ func(), err error) {
|
||||
var (
|
||||
usernsFile *os.File
|
||||
pipeMap [2]int
|
||||
pid uintptr
|
||||
errno syscall.Errno
|
||||
uidMaps, gidMaps []syscall.SysProcIDMap
|
||||
)
|
||||
|
||||
if uidMaps, err = parseIDMapping(uidmap); err != nil {
|
||||
return -1, nil, err
|
||||
}
|
||||
if gidMaps, err = parseIDMapping(gidmap); err != nil {
|
||||
return -1, nil, err
|
||||
}
|
||||
|
||||
syscall.ForkLock.Lock()
|
||||
if err = syscall.Pipe2(pipeMap[:], syscall.O_CLOEXEC); err != nil {
|
||||
syscall.ForkLock.Unlock()
|
||||
return -1, nil, err
|
||||
}
|
||||
|
||||
pid, errno = sys.ForkUserns(pipeMap)
|
||||
syscall.ForkLock.Unlock()
|
||||
if errno != 0 {
|
||||
syscall.Close(pipeMap[0])
|
||||
syscall.Close(pipeMap[1])
|
||||
return -1, nil, errno
|
||||
}
|
||||
|
||||
syscall.Close(pipeMap[0])
|
||||
|
||||
writeMappings := func(fname string, idmap []syscall.SysProcIDMap) error {
|
||||
mappings := ""
|
||||
for _, m := range idmap {
|
||||
mappings = fmt.Sprintf("%d %d %d\n", m.ContainerID, m.HostID, m.Size)
|
||||
}
|
||||
return os.WriteFile(fmt.Sprintf("/proc/%d/%s", pid, fname), []byte(mappings), 0600)
|
||||
}
|
||||
|
||||
cleanUpChild := func() {
|
||||
sync := sys.ProcSyncExit
|
||||
if _, _, errno := syscall.Syscall6(syscall.SYS_WRITE, uintptr(pipeMap[1]), uintptr(unsafe.Pointer(&sync)), unsafe.Sizeof(sync), 0, 0, 0); errno != 0 {
|
||||
logrus.WithError(errno).Warnf("failed to sync with child (ProcSyncExit)")
|
||||
}
|
||||
syscall.Close(pipeMap[1])
|
||||
|
||||
if _, err := unix.Wait4(int(pid), nil, 0, nil); err != nil {
|
||||
logrus.WithError(err).Warnf("failed to wait for child process; the SIGHLD might be received by shim reaper")
|
||||
}
|
||||
}
|
||||
defer cleanUpChild()
|
||||
|
||||
if err := writeMappings("uid_map", uidMaps); err != nil {
|
||||
return -1, nil, err
|
||||
}
|
||||
if err := writeMappings("gid_map", gidMaps); err != nil {
|
||||
return -1, nil, err
|
||||
}
|
||||
|
||||
if usernsFile, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", pid)); err != nil {
|
||||
return -1, nil, fmt.Errorf("failed to get user ns file descriptor for - /proc/%d/user/ns: %w", pid, err)
|
||||
}
|
||||
|
||||
return int(usernsFile.Fd()), func() {
|
||||
usernsFile.Close()
|
||||
}, nil
|
||||
}
|
@ -21,14 +21,26 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
exec "golang.org/x/sys/execabs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type mountOpt struct {
|
||||
flags int
|
||||
data []string
|
||||
losetup bool
|
||||
uidmap string
|
||||
gidmap string
|
||||
}
|
||||
|
||||
var (
|
||||
pagesize = 4096
|
||||
allowedHelperBinaries = []string{"mount.fuse", "mount.fuse3"}
|
||||
@ -38,6 +50,34 @@ func init() {
|
||||
pagesize = os.Getpagesize()
|
||||
}
|
||||
|
||||
// prepareIDMappedOverlay is a helper function to obtain
|
||||
// actual "lowerdir=..." mount options. It creates and
|
||||
// applies id mapping for each lowerdir.
|
||||
//
|
||||
// It returns:
|
||||
// 1. New options that include new "lowedir=..." mount option.
|
||||
// 2. "Clean up" function -- it should be called as a defer one before
|
||||
// checking for error, because if do the second and avoid calling "clean up",
|
||||
// you're going to have "dirty" setup -- there's no guarantee that those
|
||||
// temporary mount points for lowedirs will be cleaned properly.
|
||||
// 3. Error -- nil if everything's fine, otherwise an error.
|
||||
func prepareIDMappedOverlay(usernsFd int, options []string) ([]string, func(), error) {
|
||||
lowerIdx, lowerDirs := findOverlayLowerdirs(options)
|
||||
if lowerIdx == -1 {
|
||||
return options, nil, fmt.Errorf("failed to parse overlay lowerdir's from given options")
|
||||
}
|
||||
|
||||
tmpLowerdirs, idMapCleanUp, err := doPrepareIDMappedOverlay(lowerDirs, usernsFd)
|
||||
if err != nil {
|
||||
return options, idMapCleanUp, fmt.Errorf("failed to create idmapped mount: %w", err)
|
||||
}
|
||||
|
||||
options = append(options[:lowerIdx], options[lowerIdx+1:]...)
|
||||
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(tmpLowerdirs, ":")))
|
||||
|
||||
return options, idMapCleanUp, nil
|
||||
}
|
||||
|
||||
// Mount to the provided target path.
|
||||
//
|
||||
// If m.Type starts with "fuse." or "fuse3.", "mount.fuse" or "mount.fuse3"
|
||||
@ -51,45 +91,81 @@ func (m *Mount) mount(target string) (err error) {
|
||||
}
|
||||
}
|
||||
var (
|
||||
chdir string
|
||||
options = m.Options
|
||||
chdir string
|
||||
recalcOpt bool
|
||||
usernsFd int
|
||||
options = m.Options
|
||||
)
|
||||
opt := parseMountOptions(options)
|
||||
// The only remapping of both GID and UID is supported
|
||||
if opt.uidmap != "" && opt.gidmap != "" {
|
||||
var (
|
||||
childProcCleanUp func()
|
||||
)
|
||||
if usernsFd, childProcCleanUp, err = GetUsernsFD(opt.uidmap, opt.gidmap); err != nil {
|
||||
return err
|
||||
}
|
||||
defer childProcCleanUp()
|
||||
|
||||
// overlay expects lowerdir's to be remapped instead
|
||||
if m.Type == "overlay" {
|
||||
var (
|
||||
userNsCleanUp func()
|
||||
)
|
||||
options, userNsCleanUp, err = prepareIDMappedOverlay(usernsFd, options)
|
||||
defer userNsCleanUp()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to prepare idmapped overlay: %w", err)
|
||||
}
|
||||
// To not meet concurrency issues while using the same lowedirs
|
||||
// for different containers, replace them by temporary directories,
|
||||
if optionsSize(options) >= pagesize-512 {
|
||||
recalcOpt = true
|
||||
} else {
|
||||
opt = parseMountOptions(options)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// avoid hitting one page limit of mount argument buffer
|
||||
//
|
||||
// NOTE: 512 is a buffer during pagesize check.
|
||||
if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
|
||||
chdir, options = compactLowerdirOption(options)
|
||||
// recalculate opt in case of lowerdirs have been replaced
|
||||
// by idmapped ones OR idmapped mounts' not used/supported.
|
||||
if recalcOpt || (opt.uidmap == "" || opt.gidmap == "") {
|
||||
opt = parseMountOptions(options)
|
||||
}
|
||||
}
|
||||
|
||||
flags, data, losetup := parseMountOptions(options)
|
||||
|
||||
// propagation types.
|
||||
const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
|
||||
|
||||
// Ensure propagation type change flags aren't included in other calls.
|
||||
oflags := flags &^ ptypes
|
||||
oflags := opt.flags &^ ptypes
|
||||
|
||||
var loopParams LoopParams
|
||||
if losetup {
|
||||
if opt.losetup {
|
||||
loopParams = LoopParams{
|
||||
Readonly: oflags&unix.MS_RDONLY == unix.MS_RDONLY,
|
||||
Autoclear: true,
|
||||
}
|
||||
loopParams.Direct, data = hasDirectIO(data)
|
||||
loopParams.Direct, opt.data = hasDirectIO(opt.data)
|
||||
}
|
||||
|
||||
dataInStr := strings.Join(data, ",")
|
||||
dataInStr := strings.Join(opt.data, ",")
|
||||
if len(dataInStr) > pagesize {
|
||||
return errors.New("mount options is too long")
|
||||
}
|
||||
|
||||
// In the case of remounting with changed data (data != ""), need to call mount (moby/moby#34077).
|
||||
if flags&unix.MS_REMOUNT == 0 || dataInStr != "" {
|
||||
// In the case of remounting with changed data (dataInStr != ""), need to call mount (moby/moby#34077).
|
||||
if opt.flags&unix.MS_REMOUNT == 0 || dataInStr != "" {
|
||||
// Initial call applying all non-propagation flags for mount
|
||||
// or remount with changed data
|
||||
source := m.Source
|
||||
if losetup {
|
||||
if opt.losetup {
|
||||
loFile, err := setupLoop(m.Source, loopParams)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -104,10 +180,10 @@ func (m *Mount) mount(target string) (err error) {
|
||||
}
|
||||
}
|
||||
|
||||
if flags&ptypes != 0 {
|
||||
if opt.flags&ptypes != 0 {
|
||||
// Change the propagation type.
|
||||
const pflags = ptypes | unix.MS_REC | unix.MS_SILENT
|
||||
if err := unix.Mount("", target, "", uintptr(flags&pflags), ""); err != nil {
|
||||
if err := unix.Mount("", target, "", uintptr(opt.flags&pflags), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -117,9 +193,45 @@ func (m *Mount) mount(target string) (err error) {
|
||||
// Remount the bind to apply read only.
|
||||
return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "")
|
||||
}
|
||||
|
||||
// remap non-overlay mount point
|
||||
if opt.uidmap != "" && opt.gidmap != "" && m.Type != "overlay" {
|
||||
if err := IDMapMount(target, target, usernsFd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func doPrepareIDMappedOverlay(lowerDirs []string, usernsFd int) (tmpLowerDirs []string, _ func(), _ error) {
|
||||
td, err := os.MkdirTemp(tempMountLocation, "ovl-idmapped")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
cleanUp := func() {
|
||||
for _, lowerDir := range tmpLowerDirs {
|
||||
if err := unix.Unmount(lowerDir, 0); err != nil {
|
||||
logrus.WithError(err).Warnf("failed to unmount temp lowerdir %s", lowerDir)
|
||||
}
|
||||
}
|
||||
if terr := os.RemoveAll(filepath.Clean(filepath.Join(tmpLowerDirs[0], ".."))); terr != nil {
|
||||
logrus.WithError(terr).Warnf("failed to remove temporary overlay lowerdir's")
|
||||
}
|
||||
}
|
||||
for i, lowerDir := range lowerDirs {
|
||||
tmpLowerDir := filepath.Join(td, strconv.Itoa(i))
|
||||
tmpLowerDirs = append(tmpLowerDirs, tmpLowerDir)
|
||||
|
||||
if err = os.MkdirAll(tmpLowerDir, 0700); err != nil {
|
||||
return nil, cleanUp, fmt.Errorf("failed to create temporary dir: %w", err)
|
||||
}
|
||||
if err = IDMapMount(lowerDir, tmpLowerDir, usernsFd); err != nil {
|
||||
return nil, cleanUp, err
|
||||
}
|
||||
}
|
||||
return tmpLowerDirs, cleanUp, nil
|
||||
}
|
||||
|
||||
// Unmount the provided mount path with the flags
|
||||
func Unmount(target string, flags int) error {
|
||||
if err := unmount(target, flags); err != nil && err != unix.EINVAL {
|
||||
@ -208,14 +320,9 @@ func UnmountAll(mount string, flags int) error {
|
||||
|
||||
// parseMountOptions takes fstab style mount options and parses them for
|
||||
// use with a standard mount() syscall
|
||||
func parseMountOptions(options []string) (int, []string, bool) {
|
||||
var (
|
||||
flag int
|
||||
losetup bool
|
||||
data []string
|
||||
)
|
||||
func parseMountOptions(options []string) (opt mountOpt) {
|
||||
loopOpt := "loop"
|
||||
flags := map[string]struct {
|
||||
flagsMap := map[string]struct {
|
||||
clear bool
|
||||
flag int
|
||||
}{
|
||||
@ -249,19 +356,23 @@ func parseMountOptions(options []string) (int, []string, bool) {
|
||||
// If the option does not exist in the flags table or the flag
|
||||
// is not supported on the platform,
|
||||
// then it is a data value for a specific fs type
|
||||
if f, exists := flags[o]; exists && f.flag != 0 {
|
||||
if f, exists := flagsMap[o]; exists && f.flag != 0 {
|
||||
if f.clear {
|
||||
flag &^= f.flag
|
||||
opt.flags &^= f.flag
|
||||
} else {
|
||||
flag |= f.flag
|
||||
opt.flags |= f.flag
|
||||
}
|
||||
} else if o == loopOpt {
|
||||
losetup = true
|
||||
opt.losetup = true
|
||||
} else if strings.HasPrefix(o, "uidmap=") {
|
||||
opt.uidmap = strings.TrimPrefix(o, "uidmap=")
|
||||
} else if strings.HasPrefix(o, "gidmap=") {
|
||||
opt.gidmap = strings.TrimPrefix(o, "gidmap=")
|
||||
} else {
|
||||
data = append(data, o)
|
||||
opt.data = append(opt.data, o)
|
||||
}
|
||||
}
|
||||
return flag, data, losetup
|
||||
return
|
||||
}
|
||||
|
||||
func hasDirectIO(opts []string) (bool, []string) {
|
||||
|
@ -45,6 +45,7 @@ type SnapshotterConfig struct {
|
||||
upperdirLabel bool
|
||||
ms MetaStore
|
||||
mountOptions []string
|
||||
remapIds bool
|
||||
}
|
||||
|
||||
// Opt is an option to configure the overlay snapshotter
|
||||
@ -92,12 +93,18 @@ func WithMetaStore(ms MetaStore) Opt {
|
||||
}
|
||||
}
|
||||
|
||||
func WithRemapIds(config *SnapshotterConfig) error {
|
||||
config.remapIds = true
|
||||
return nil
|
||||
}
|
||||
|
||||
type snapshotter struct {
|
||||
root string
|
||||
ms MetaStore
|
||||
asyncRemove bool
|
||||
upperdirLabel bool
|
||||
options []string
|
||||
remapIds bool
|
||||
}
|
||||
|
||||
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
|
||||
@ -153,6 +160,7 @@ func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
|
||||
asyncRemove: config.asyncRemove,
|
||||
upperdirLabel: config.upperdirLabel,
|
||||
options: config.mountOptions,
|
||||
remapIds: config.remapIds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -259,16 +267,22 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap
|
||||
// This can be used to recover mounts after calling View or Prepare.
|
||||
func (o *snapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
|
||||
var s storage.Snapshot
|
||||
var info snapshots.Info
|
||||
if err := o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
|
||||
s, err = storage.GetSnapshot(ctx, key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get active mount: %w", err)
|
||||
}
|
||||
|
||||
_, info, _, err = storage.GetInfo(ctx, key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get snapshot info: %w", err)
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return o.mounts(s), nil
|
||||
return o.mounts(s, info), nil
|
||||
}
|
||||
|
||||
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
|
||||
@ -402,10 +416,46 @@ func (o *snapshotter) getCleanupDirectories(ctx context.Context) ([]string, erro
|
||||
return cleanup, nil
|
||||
}
|
||||
|
||||
func validateIDMapping(mapping string) error {
|
||||
var (
|
||||
hostID int
|
||||
ctrID int
|
||||
length int
|
||||
)
|
||||
|
||||
if _, err := fmt.Sscanf(mapping, "%d:%d:%d", &ctrID, &hostID, &length); err != nil {
|
||||
return err
|
||||
}
|
||||
// Almost impossible, but snapshots.WithLabels doesn't check it
|
||||
if ctrID < 0 || hostID < 0 || length < 0 {
|
||||
return fmt.Errorf("invalid mapping \"%d:%d:%d\"", ctrID, hostID, length)
|
||||
}
|
||||
if ctrID != 0 {
|
||||
return fmt.Errorf("container mapping of 0 is only supported")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hostID(mapping string) (int, error) {
|
||||
var (
|
||||
hostID int
|
||||
ctrID int
|
||||
length int
|
||||
)
|
||||
if err := validateIDMapping(mapping); err != nil {
|
||||
return -1, fmt.Errorf("invalid mapping: %w", err)
|
||||
}
|
||||
if _, err := fmt.Sscanf(mapping, "%d:%d:%d", &ctrID, &hostID, &length); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return hostID, nil
|
||||
}
|
||||
|
||||
func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
|
||||
var (
|
||||
s storage.Snapshot
|
||||
td, path string
|
||||
info snapshots.Info
|
||||
)
|
||||
|
||||
defer func() {
|
||||
@ -436,14 +486,46 @@ func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
|
||||
return fmt.Errorf("failed to create snapshot: %w", err)
|
||||
}
|
||||
|
||||
if len(s.ParentIDs) > 0 {
|
||||
st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to stat parent: %w", err)
|
||||
}
|
||||
_, info, _, err = storage.GetInfo(ctx, key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get snapshot info: %w", err)
|
||||
}
|
||||
|
||||
stat := st.Sys().(*syscall.Stat_t)
|
||||
if err := os.Lchown(filepath.Join(td, "fs"), int(stat.Uid), int(stat.Gid)); err != nil {
|
||||
mappedUID := -1
|
||||
mappedGID := -1
|
||||
// NOTE: if idmapped mounts' supported by hosted kernel there may be
|
||||
// no parents at all, so overlayfs will not work and snapshotter
|
||||
// will use bind mount. To be able to create file objects inside the
|
||||
// rootfs -- just chown this only bound directory according to provided
|
||||
// {uid,gid}map. In case of one/multiple parents -- chown upperdir.
|
||||
if v, ok := info.Labels[snapshots.LabelSnapshotUIDMapping]; ok {
|
||||
if mappedUID, err = hostID(v); err != nil {
|
||||
return fmt.Errorf("failed to parse UID mapping: %w", err)
|
||||
}
|
||||
}
|
||||
if v, ok := info.Labels[snapshots.LabelSnapshotGIDMapping]; ok {
|
||||
if mappedGID, err = hostID(v); err != nil {
|
||||
return fmt.Errorf("failed to parse GID mapping: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if mappedUID == -1 || mappedGID == -1 {
|
||||
if len(s.ParentIDs) > 0 {
|
||||
st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to stat parent: %w", err)
|
||||
}
|
||||
stat, ok := st.Sys().(*syscall.Stat_t)
|
||||
if !ok {
|
||||
return fmt.Errorf("incompatible types after stat call: *syscall.Stat_t expected")
|
||||
}
|
||||
mappedUID = int(stat.Uid)
|
||||
mappedGID = int(stat.Gid)
|
||||
}
|
||||
}
|
||||
|
||||
if mappedUID != -1 && mappedGID != -1 {
|
||||
if err := os.Lchown(filepath.Join(td, "fs"), mappedUID, mappedGID); err != nil {
|
||||
return fmt.Errorf("failed to chown: %w", err)
|
||||
}
|
||||
}
|
||||
@ -458,8 +540,7 @@ func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return o.mounts(s), nil
|
||||
return o.mounts(s, info), nil
|
||||
}
|
||||
|
||||
func (o *snapshotter) prepareDirectory(ctx context.Context, snapshotDir string, kind snapshots.Kind) (string, error) {
|
||||
@ -481,7 +562,18 @@ func (o *snapshotter) prepareDirectory(ctx context.Context, snapshotDir string,
|
||||
return td, nil
|
||||
}
|
||||
|
||||
func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
|
||||
func (o *snapshotter) mounts(s storage.Snapshot, info snapshots.Info) []mount.Mount {
|
||||
var options []string
|
||||
|
||||
if o.remapIds {
|
||||
if v, ok := info.Labels[snapshots.LabelSnapshotUIDMapping]; ok {
|
||||
options = append(options, fmt.Sprintf("uidmap=%s", v))
|
||||
}
|
||||
if v, ok := info.Labels[snapshots.LabelSnapshotGIDMapping]; ok {
|
||||
options = append(options, fmt.Sprintf("gidmap=%s", v))
|
||||
}
|
||||
}
|
||||
|
||||
if len(s.ParentIDs) == 0 {
|
||||
// if we only have one layer/no parents then just return a bind mount as overlay
|
||||
// will not work
|
||||
@ -489,20 +581,18 @@ func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
|
||||
if s.Kind == snapshots.KindView {
|
||||
roFlag = "ro"
|
||||
}
|
||||
|
||||
return []mount.Mount{
|
||||
{
|
||||
Source: o.upperPath(s.ID),
|
||||
Type: "bind",
|
||||
Options: []string{
|
||||
Options: append(options,
|
||||
roFlag,
|
||||
"rbind",
|
||||
},
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
options := o.options
|
||||
if s.Kind == snapshots.KindActive {
|
||||
options = append(options,
|
||||
fmt.Sprintf("workdir=%s", o.workPath(s.ID)),
|
||||
@ -513,10 +603,10 @@ func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
|
||||
{
|
||||
Source: o.upperPath(s.ParentIDs[0]),
|
||||
Type: "bind",
|
||||
Options: []string{
|
||||
Options: append(options,
|
||||
"ro",
|
||||
"rbind",
|
||||
},
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -525,8 +615,9 @@ func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
|
||||
for i := range s.ParentIDs {
|
||||
parentPaths[i] = o.upperPath(s.ParentIDs[i])
|
||||
}
|
||||
|
||||
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":")))
|
||||
options = append(options, o.options...)
|
||||
|
||||
return []mount.Mount{
|
||||
{
|
||||
Type: "overlay",
|
||||
@ -534,7 +625,6 @@ func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
|
||||
Options: options,
|
||||
},
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (o *snapshotter) upperPath(id string) string {
|
||||
|
@ -26,12 +26,14 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/mount"
|
||||
"github.com/containerd/containerd/pkg/testutil"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
"github.com/containerd/containerd/snapshots/overlay/overlayutils"
|
||||
"github.com/containerd/containerd/snapshots/storage"
|
||||
"github.com/containerd/containerd/snapshots/testsuite"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
func newSnapshotterWithOpts(opts ...Opt) testsuite.SnapshotterFunc {
|
||||
@ -51,12 +53,23 @@ func TestOverlay(t *testing.T) {
|
||||
"no opt": nil,
|
||||
// default in init()
|
||||
"AsynchronousRemove": {AsynchronousRemove},
|
||||
// idmapped mounts enabled
|
||||
"WithRemapIds": {WithRemapIds},
|
||||
}
|
||||
|
||||
for optsName, opts := range optTestCases {
|
||||
t.Run(optsName, func(t *testing.T) {
|
||||
newSnapshotter := newSnapshotterWithOpts(opts...)
|
||||
testsuite.SnapshotterSuite(t, "overlayfs", newSnapshotter)
|
||||
t.Run("TestOverlayRemappedBind", func(t *testing.T) {
|
||||
testOverlayRemappedBind(t, newSnapshotter)
|
||||
})
|
||||
t.Run("TestOverlayRemappedActive", func(t *testing.T) {
|
||||
testOverlayRemappedActive(t, newSnapshotter)
|
||||
})
|
||||
t.Run("TestOverlayRemappedInvalidMappings", func(t *testing.T) {
|
||||
testOverlayRemappedInvalidMapping(t, newSnapshotter)
|
||||
})
|
||||
t.Run("TestOverlayMounts", func(t *testing.T) {
|
||||
testOverlayMounts(t, newSnapshotter)
|
||||
})
|
||||
@ -156,28 +169,28 @@ func testOverlayOverlayMount(t *testing.T, newSnapshotter testsuite.SnapshotterF
|
||||
t.Errorf("expected source %q but received %q", "overlay", m.Source)
|
||||
}
|
||||
var (
|
||||
bp = getBasePath(ctx, o, root, "/tmp/layer2")
|
||||
work = "workdir=" + filepath.Join(bp, "work")
|
||||
upper = "upperdir=" + filepath.Join(bp, "fs")
|
||||
lower = "lowerdir=" + getParents(ctx, o, root, "/tmp/layer2")[0]
|
||||
expected []string
|
||||
bp = getBasePath(ctx, o, root, "/tmp/layer2")
|
||||
work = "workdir=" + filepath.Join(bp, "work")
|
||||
upper = "upperdir=" + filepath.Join(bp, "fs")
|
||||
lower = "lowerdir=" + getParents(ctx, o, root, "/tmp/layer2")[0]
|
||||
)
|
||||
|
||||
expected := []string{
|
||||
"index=off",
|
||||
}
|
||||
if !supportsIndex() {
|
||||
expected = expected[1:]
|
||||
expected = append(expected, []string{
|
||||
work,
|
||||
upper,
|
||||
lower,
|
||||
}...)
|
||||
|
||||
if supportsIndex() {
|
||||
expected = append(expected, "index=off")
|
||||
}
|
||||
if userxattr, err := overlayutils.NeedsUserXAttr(root); err != nil {
|
||||
t.Fatal(err)
|
||||
} else if userxattr {
|
||||
expected = append(expected, "userxattr")
|
||||
}
|
||||
expected = append(expected, []string{
|
||||
work,
|
||||
upper,
|
||||
lower,
|
||||
}...)
|
||||
|
||||
for i, v := range expected {
|
||||
if m.Options[i] != v {
|
||||
t.Errorf("expected %q but received %q", v, m.Options[i])
|
||||
@ -185,6 +198,241 @@ func testOverlayOverlayMount(t *testing.T, newSnapshotter testsuite.SnapshotterF
|
||||
}
|
||||
}
|
||||
|
||||
func testOverlayRemappedBind(t *testing.T, newSnapshotter testsuite.SnapshotterFunc) {
|
||||
var (
|
||||
opts []snapshots.Opt
|
||||
mounts []mount.Mount
|
||||
)
|
||||
|
||||
ctx := context.TODO()
|
||||
root := t.TempDir()
|
||||
o, _, err := newSnapshotter(ctx, root)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if sn, ok := o.(*snapshotter); !ok || !sn.remapIds {
|
||||
t.Skip("overlayfs doesn't support idmapped mounts")
|
||||
}
|
||||
|
||||
hostID := uint32(666)
|
||||
contID := uint32(0)
|
||||
length := uint32(65536)
|
||||
|
||||
uidMap := specs.LinuxIDMapping{
|
||||
ContainerID: contID,
|
||||
HostID: hostID,
|
||||
Size: length,
|
||||
}
|
||||
gidMap := specs.LinuxIDMapping{
|
||||
ContainerID: contID,
|
||||
HostID: hostID,
|
||||
Size: length,
|
||||
}
|
||||
opts = append(opts, containerd.WithRemapperLabels(
|
||||
uidMap.ContainerID, uidMap.HostID,
|
||||
gidMap.ContainerID, gidMap.HostID,
|
||||
length),
|
||||
)
|
||||
|
||||
key := "/tmp/test"
|
||||
if mounts, err = o.Prepare(ctx, key, "", opts...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
bp := getBasePath(ctx, o, root, key)
|
||||
expected := []string{
|
||||
fmt.Sprintf("uidmap=%d:%d:%d", uidMap.ContainerID, uidMap.HostID, uidMap.Size),
|
||||
fmt.Sprintf("gidmap=%d:%d:%d", gidMap.ContainerID, gidMap.HostID, gidMap.Size),
|
||||
"rw",
|
||||
"rbind",
|
||||
}
|
||||
|
||||
checkMountOpts := func() {
|
||||
if len(mounts) != 1 {
|
||||
t.Errorf("should only have 1 mount but received %d", len(mounts))
|
||||
}
|
||||
|
||||
if len(mounts[0].Options) != len(expected) {
|
||||
t.Errorf("expected %d options, but received %d", len(expected), len(mounts[0].Options))
|
||||
}
|
||||
|
||||
m := mounts[0]
|
||||
for i, v := range expected {
|
||||
if m.Options[i] != v {
|
||||
t.Errorf("mount option %q is not valid, expected %q", m.Options[i], v)
|
||||
}
|
||||
}
|
||||
|
||||
st, err := os.Stat(filepath.Join(bp, "fs"))
|
||||
if err != nil {
|
||||
t.Errorf("failed to stat %s", filepath.Join(bp, "fs"))
|
||||
}
|
||||
|
||||
if stat, ok := st.Sys().(*syscall.Stat_t); !ok {
|
||||
t.Errorf("incompatible types after stat call: *syscall.Stat_t expected")
|
||||
} else if stat.Uid != uidMap.HostID || stat.Gid != gidMap.HostID {
|
||||
t.Errorf("bad mapping: expected {uid: %d, gid: %d}; real {uid: %d, gid: %d}", uidMap.HostID, gidMap.HostID, int(stat.Uid), int(stat.Gid))
|
||||
}
|
||||
}
|
||||
checkMountOpts()
|
||||
|
||||
expected[2] = "ro"
|
||||
if err = o.Commit(ctx, "base", key, opts...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if mounts, err = o.View(ctx, key, "base", opts...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
bp = getBasePath(ctx, o, root, key)
|
||||
checkMountOpts()
|
||||
|
||||
key = "/tmp/test1"
|
||||
if mounts, err = o.Prepare(ctx, key, ""); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
bp = getBasePath(ctx, o, root, key)
|
||||
|
||||
expected = expected[2:]
|
||||
expected[0] = "rw"
|
||||
|
||||
uidMap.HostID = 0
|
||||
gidMap.HostID = 0
|
||||
|
||||
checkMountOpts()
|
||||
}
|
||||
|
||||
func testOverlayRemappedActive(t *testing.T, newSnapshotter testsuite.SnapshotterFunc) {
|
||||
var (
|
||||
opts []snapshots.Opt
|
||||
mounts []mount.Mount
|
||||
)
|
||||
|
||||
ctx := context.TODO()
|
||||
root := t.TempDir()
|
||||
o, _, err := newSnapshotter(ctx, root)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if sn, ok := o.(*snapshotter); !ok || !sn.remapIds {
|
||||
t.Skip("overlayfs doesn't support idmapped mounts")
|
||||
}
|
||||
|
||||
hostID := uint32(666)
|
||||
contID := uint32(0)
|
||||
length := uint32(65536)
|
||||
|
||||
uidMap := specs.LinuxIDMapping{
|
||||
ContainerID: contID,
|
||||
HostID: hostID,
|
||||
Size: length,
|
||||
}
|
||||
gidMap := specs.LinuxIDMapping{
|
||||
ContainerID: contID,
|
||||
HostID: hostID,
|
||||
Size: length,
|
||||
}
|
||||
opts = append(opts, containerd.WithRemapperLabels(
|
||||
uidMap.ContainerID, uidMap.HostID,
|
||||
gidMap.ContainerID, gidMap.HostID,
|
||||
length),
|
||||
)
|
||||
|
||||
key := "/tmp/test"
|
||||
if _, err = o.Prepare(ctx, key, "", opts...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = o.Commit(ctx, "base", key, opts...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if mounts, err = o.Prepare(ctx, key, "base", opts...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(mounts) != 1 {
|
||||
t.Errorf("should only have 1 mount but received %d", len(mounts))
|
||||
}
|
||||
|
||||
bp := getBasePath(ctx, o, root, key)
|
||||
expected := []string{
|
||||
fmt.Sprintf("uidmap=%d:%d:%d", uidMap.ContainerID, uidMap.HostID, uidMap.Size),
|
||||
fmt.Sprintf("gidmap=%d:%d:%d", gidMap.ContainerID, gidMap.HostID, gidMap.Size),
|
||||
fmt.Sprintf("workdir=%s", filepath.Join(bp, "work")),
|
||||
fmt.Sprintf("upperdir=%s", filepath.Join(bp, "fs")),
|
||||
fmt.Sprintf("lowerdir=%s", getParents(ctx, o, root, key)[0]),
|
||||
}
|
||||
|
||||
m := mounts[0]
|
||||
for i, v := range expected {
|
||||
if m.Options[i] != v {
|
||||
t.Errorf("mount option %q is invalid, expected %q", m.Options[i], v)
|
||||
}
|
||||
}
|
||||
|
||||
st, err := os.Stat(filepath.Join(bp, "fs"))
|
||||
if err != nil {
|
||||
t.Errorf("failed to stat %s", filepath.Join(bp, "fs"))
|
||||
}
|
||||
if stat, ok := st.Sys().(*syscall.Stat_t); !ok {
|
||||
t.Errorf("incompatible types after stat call: *syscall.Stat_t expected")
|
||||
} else if stat.Uid != uidMap.HostID || stat.Gid != gidMap.HostID {
|
||||
t.Errorf("bad mapping: expected {uid: %d, gid: %d}; received {uid: %d, gid: %d}", uidMap.HostID, gidMap.HostID, int(stat.Uid), int(stat.Gid))
|
||||
}
|
||||
}
|
||||
|
||||
func testOverlayRemappedInvalidMapping(t *testing.T, newSnapshotter testsuite.SnapshotterFunc) {
|
||||
ctx := context.TODO()
|
||||
root := t.TempDir()
|
||||
o, _, err := newSnapshotter(ctx, root)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if sn, ok := o.(*snapshotter); !ok || !sn.remapIds {
|
||||
t.Skip("overlayfs doesn't support idmapped mounts")
|
||||
}
|
||||
|
||||
key := "/tmp/test"
|
||||
for desc, opts := range map[string][]snapshots.Opt{
|
||||
"WithLabels: negative UID mapping must fail": {
|
||||
snapshots.WithLabels(map[string]string{
|
||||
snapshots.LabelSnapshotUIDMapping: "-1:-1:-2",
|
||||
snapshots.LabelSnapshotGIDMapping: "0:0:66666",
|
||||
}),
|
||||
},
|
||||
"WithLabels: negative GID mapping must fail": {
|
||||
snapshots.WithLabels(map[string]string{
|
||||
snapshots.LabelSnapshotUIDMapping: "0:0:66666",
|
||||
snapshots.LabelSnapshotGIDMapping: "-1:-1:-2",
|
||||
}),
|
||||
},
|
||||
"WithLabels: negative GID/UID mappings must fail": {
|
||||
snapshots.WithLabels(map[string]string{
|
||||
snapshots.LabelSnapshotUIDMapping: "-666:-666:-666",
|
||||
snapshots.LabelSnapshotGIDMapping: "-666:-666:-666",
|
||||
}),
|
||||
},
|
||||
"WithRemapperLabels: container ID (GID/UID) other than 0 must fail": {
|
||||
containerd.WithRemapperLabels(666, 666, 666, 666, 666),
|
||||
},
|
||||
"WithRemapperLabels: container ID (UID) other than 0 must fail": {
|
||||
containerd.WithRemapperLabels(666, 0, 0, 0, 65536),
|
||||
},
|
||||
"WithRemapperLabels: container ID (GID) other than 0 must fail": {
|
||||
containerd.WithRemapperLabels(0, 0, 666, 0, 4294967295),
|
||||
},
|
||||
} {
|
||||
t.Log(desc)
|
||||
if _, err = o.Prepare(ctx, key, "", opts...); err == nil {
|
||||
t.Fatalf("snapshots with invalid mappings must fail")
|
||||
}
|
||||
// remove may fail, but it doesn't matter
|
||||
_ = o.Remove(ctx, key)
|
||||
}
|
||||
}
|
||||
|
||||
func getBasePath(ctx context.Context, sn snapshots.Snapshotter, root, key string) string {
|
||||
o := sn.(*snapshotter)
|
||||
ctx, t, err := o.ms.TransactionContext(ctx, false)
|
||||
@ -306,6 +554,7 @@ func testOverlayView(t *testing.T, newSnapshotter testsuite.SnapshotterFunc) {
|
||||
if m.Source != expected {
|
||||
t.Errorf("expected source %q but received %q", expected, m.Source)
|
||||
}
|
||||
|
||||
if m.Options[0] != "ro" {
|
||||
t.Errorf("expected mount option ro but received %q", m.Options[0])
|
||||
}
|
||||
@ -345,18 +594,13 @@ func testOverlayView(t *testing.T, newSnapshotter testsuite.SnapshotterFunc) {
|
||||
t.Errorf("expected %d additional mount option but got %d", expectedOptions, len(m.Options))
|
||||
}
|
||||
lowers := getParents(ctx, o, root, "/tmp/view2")
|
||||
|
||||
expected = fmt.Sprintf("lowerdir=%s:%s", lowers[0], lowers[1])
|
||||
optIdx := 2
|
||||
if !supportsIndex {
|
||||
optIdx--
|
||||
if m.Options[0] != expected {
|
||||
t.Errorf("expected option %q but received %q", expected, m.Options[0])
|
||||
}
|
||||
if userxattr {
|
||||
optIdx++
|
||||
}
|
||||
if m.Options[0] != "volatile" {
|
||||
|
||||
if m.Options[1] != "volatile" {
|
||||
t.Error("expected option first option to be provided option \"volatile\"")
|
||||
}
|
||||
if m.Options[optIdx] != expected {
|
||||
t.Errorf("expected option %q but received %q", expected, m.Options[optIdx])
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,8 @@ import (
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
kernel "github.com/containerd/containerd/contrib/seccomp/kernelversion"
|
||||
"github.com/containerd/containerd/log"
|
||||
"github.com/containerd/containerd/mount"
|
||||
@ -198,3 +200,98 @@ func NeedsUserXAttr(d string) (bool, error) {
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// SupportsIDMappedMounts tells if this kernel supports idmapped mounts for overlayfs
|
||||
// or not.
|
||||
//
|
||||
// This function returns error whether the kernel supports idmapped mounts
|
||||
// for overlayfs or not, i.e. if e.g. -ENOSYS may be returned as well as -EPERM.
|
||||
// So, caller should check for (true, err == nil), otherwise treat it as there's
|
||||
// no support from the kernel side.
|
||||
func SupportsIDMappedMounts() (bool, error) {
|
||||
// Fast path
|
||||
fiveDotNineteen := kernel.KernelVersion{Kernel: 5, Major: 19}
|
||||
if ok, err := kernel.GreaterEqualThan(fiveDotNineteen); err == nil && ok {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Do slow path, because idmapped mounts may be backported to older kernels.
|
||||
uidMap := syscall.SysProcIDMap{
|
||||
ContainerID: 0,
|
||||
HostID: 666,
|
||||
Size: 1,
|
||||
}
|
||||
gidMap := syscall.SysProcIDMap{
|
||||
ContainerID: 0,
|
||||
HostID: 666,
|
||||
Size: 1,
|
||||
}
|
||||
td, err := os.MkdirTemp("", "ovl-idmapped-check")
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to create check directory: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := os.RemoveAll(td); err != nil {
|
||||
log.L.WithError(err).Warnf("failed to remove check directory %s", td)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, dir := range []string{"lower", "upper", "work", "merged"} {
|
||||
if err = os.Mkdir(filepath.Join(td, dir), 0755); err != nil {
|
||||
return false, fmt.Errorf("failed to create %s directory: %w", dir, err)
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err = os.RemoveAll(td); err != nil {
|
||||
log.L.WithError(err).Warnf("failed remove overlay check directory %s", td)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = os.Lchown(filepath.Join(td, "upper"), uidMap.HostID, gidMap.HostID); err != nil {
|
||||
return false, fmt.Errorf("failed to chown upper directory %s: %w", filepath.Join(td, "upper"), err)
|
||||
}
|
||||
|
||||
lowerDir := filepath.Join(td, "lower")
|
||||
uidmap := fmt.Sprintf("%d:%d:%d", uidMap.ContainerID, uidMap.HostID, uidMap.Size)
|
||||
gidmap := fmt.Sprintf("%d:%d:%d", gidMap.ContainerID, gidMap.HostID, gidMap.Size)
|
||||
|
||||
usernsFd, childProcCleanUp, err := mount.GetUsernsFD(uidmap, gidmap)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer childProcCleanUp()
|
||||
|
||||
if err = mount.IDMapMount(lowerDir, lowerDir, usernsFd); err != nil {
|
||||
return false, fmt.Errorf("failed to remap lowerdir %s: %w", lowerDir, err)
|
||||
}
|
||||
defer func() {
|
||||
if err = unix.Unmount(lowerDir, 0); err != nil {
|
||||
log.L.WithError(err).Warnf("failed to unmount lowerdir %s", lowerDir)
|
||||
}
|
||||
}()
|
||||
|
||||
opts := fmt.Sprintf("index=off,lowerdir=%s,upperdir=%s,workdir=%s", lowerDir, filepath.Join(td, "upper"), filepath.Join(td, "work"))
|
||||
if err = unix.Mount("", filepath.Join(td, "merged"), "overlay", uintptr(unix.MS_RDONLY), opts); err != nil {
|
||||
return false, fmt.Errorf("failed to mount idmapped overlay to %s: %w", filepath.Join(td, "merged"), err)
|
||||
}
|
||||
defer func() {
|
||||
if err = unix.Unmount(filepath.Join(td, "merged"), 0); err != nil {
|
||||
log.L.WithError(err).Warnf("failed to unmount overlay check directory %s", filepath.Join(td, "merged"))
|
||||
}
|
||||
}()
|
||||
|
||||
// NOTE: we can't just return true if mount didn't fail since overlay supports
|
||||
// idmappings for {lower,upper}dir. That means we need to check merged directory
|
||||
// to make sure it completely supports idmapped mounts.
|
||||
st, err := os.Stat(filepath.Join(td, "merged"))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to stat %s: %w", filepath.Join(td, "merged"), err)
|
||||
}
|
||||
if stat, ok := st.Sys().(*syscall.Stat_t); !ok {
|
||||
return false, fmt.Errorf("incompatible types after stat call: *syscall.Stat_t expected")
|
||||
} else if int(stat.Uid) != uidMap.HostID || int(stat.Gid) != gidMap.HostID {
|
||||
return false, fmt.Errorf("bad mapping: expected {uid: %d, gid: %d}; real {uid: %d, gid: %d}", uidMap.HostID, gidMap.HostID, int(stat.Uid), int(stat.Gid))
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
@ -24,6 +24,11 @@ import (
|
||||
"github.com/containerd/containerd/platforms"
|
||||
"github.com/containerd/containerd/plugin"
|
||||
"github.com/containerd/containerd/snapshots/overlay"
|
||||
"github.com/containerd/containerd/snapshots/overlay/overlayutils"
|
||||
)
|
||||
|
||||
const (
|
||||
capaRemapIds = "remap-ids"
|
||||
)
|
||||
|
||||
// Config represents configuration for the overlay plugin.
|
||||
@ -66,6 +71,10 @@ func init() {
|
||||
if len(config.MountOptions) > 0 {
|
||||
oOpts = append(oOpts, overlay.WithMountOptions(config.MountOptions))
|
||||
}
|
||||
if ok, err := overlayutils.SupportsIDMappedMounts(); err == nil && ok {
|
||||
oOpts = append(oOpts, overlay.WithRemapIds)
|
||||
ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaRemapIds)
|
||||
}
|
||||
|
||||
ic.Meta.Exports["root"] = root
|
||||
return overlay.NewSnapshotter(root, oOpts...)
|
||||
|
30
sys/subprocess_unsafe_linux.go
Normal file
30
sys/subprocess_unsafe_linux.go
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
_ "unsafe" // required for go:linkname.
|
||||
)
|
||||
|
||||
//go:linkname beforeFork syscall.runtime_BeforeFork
|
||||
func beforeFork()
|
||||
|
||||
//go:linkname afterFork syscall.runtime_AfterFork
|
||||
func afterFork()
|
||||
|
||||
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
||||
func afterForkInChild()
|
65
sys/userns_unsafe_linux.go
Normal file
65
sys/userns_unsafe_linux.go
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ProcSyncType is used for synchronization
|
||||
// between parent and child processes.
|
||||
type ProcSyncType uint8
|
||||
|
||||
const (
|
||||
// ProcSyncExit tells child "it's time to exit".
|
||||
ProcSyncExit ProcSyncType = 0x1
|
||||
)
|
||||
|
||||
//go:norace
|
||||
//go:noinline
|
||||
func ForkUserns(pipeMap [2]int) (pid uintptr, errno syscall.Errno) {
|
||||
var sync ProcSyncType
|
||||
|
||||
beforeFork()
|
||||
if runtime.GOARCH == "s390x" {
|
||||
pid, _, errno = syscall.RawSyscall6(uintptr(syscall.SYS_CLONE), 0, syscall.CLONE_NEWUSER|uintptr(syscall.SIGCHLD), 0, 0, 0, 0)
|
||||
} else {
|
||||
pid, _, errno = syscall.RawSyscall6(uintptr(syscall.SYS_CLONE), syscall.CLONE_NEWUSER|uintptr(syscall.SIGCHLD), 0, 0, 0, 0, 0)
|
||||
}
|
||||
if errno != 0 || pid != 0 {
|
||||
afterFork()
|
||||
return pid, errno
|
||||
}
|
||||
|
||||
afterForkInChild()
|
||||
if _, _, errno = syscall.RawSyscall(syscall.SYS_CLOSE, uintptr(pipeMap[1]), 0, 0); errno != 0 {
|
||||
goto err
|
||||
}
|
||||
if _, _, errno = syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0, 0, 0, 0); errno != 0 {
|
||||
goto err
|
||||
}
|
||||
// wait for parent's signal
|
||||
if _, _, errno = syscall.RawSyscall6(syscall.SYS_READ, uintptr(pipeMap[0]), uintptr(unsafe.Pointer(&sync)), unsafe.Sizeof(sync), 0, 0, 0); errno != 0 || sync != ProcSyncExit {
|
||||
goto err
|
||||
}
|
||||
|
||||
err:
|
||||
syscall.RawSyscall6(syscall.SYS_EXIT, uintptr(errno), 0, 0, 0, 0, 0)
|
||||
panic("unreachable")
|
||||
}
|
Loading…
Reference in New Issue
Block a user