mount: support idmapped mount points
This patch introduces idmapped mounts support for container rootfs. The idmapped mounts support was merged in Linux kernel 5.12 torvalds/linux@7d6beb7. This functionality allows to address chown overhead for containers that use user namespace. The changes are based on experimental patchset published by Mauricio Vásquez #4734. Current version reiplements support of idmapped mounts using Golang. Performance measurement results: Image idmapped mount recursive chown BusyBox 00.135 04.964 Ubuntu 00.171 15.713 Fedora 00.143 38.799 Signed-off-by: Mauricio Vásquez <mauricio@kinvolk.io> Signed-off-by: Artem Kuzin <artem.kuzin@huawei.com> Signed-off-by: Alexey Perevalov <alexey.perevalov@huawei.com> Signed-off-by: Ilya Hanov <ilya.hanov@huawei-partners.com>
This commit is contained in:
parent
723c88ce30
commit
1555a31bf6
@ -186,8 +186,9 @@ func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli
|
|||||||
opts = append(opts,
|
opts = append(opts,
|
||||||
oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap}))
|
oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap}))
|
||||||
// use snapshotter opts or the remapped snapshot support to shift the filesystem
|
// use snapshotter opts or the remapped snapshot support to shift the filesystem
|
||||||
// currently the only snapshotter known to support the labels is fuse-overlayfs:
|
// currently the snapshotters known to support the labels are:
|
||||||
// https://github.com/AkihiroSuda/containerd-fuse-overlayfs
|
// fuse-overlayfs - https://github.com/containerd/fuse-overlayfs-snapshotter
|
||||||
|
// overlay - in case of idmapped mount points are supported by host kernel (Linux kernel 5.19)
|
||||||
if context.Bool("remap-labels") {
|
if context.Bool("remap-labels") {
|
||||||
cOpts = append(cOpts, containerd.WithNewSnapshot(id, image,
|
cOpts = append(cOpts, containerd.WithNewSnapshot(id, image,
|
||||||
containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size)))
|
containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size)))
|
||||||
|
166
mount/mount_idmapped_linux.go
Normal file
166
mount/mount_idmapped_linux.go
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package mount
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/sys"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: Support multiple mappings in future
|
||||||
|
func parseIDMapping(mapping string) ([]syscall.SysProcIDMap, error) {
|
||||||
|
parts := strings.Split(mapping, ":")
|
||||||
|
if len(parts) != 3 {
|
||||||
|
return nil, fmt.Errorf("user namespace mappings require the format `container-id:host-id:size`")
|
||||||
|
}
|
||||||
|
cID, err := strconv.Atoi(parts[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid container id for user namespace remapping, %w", err)
|
||||||
|
}
|
||||||
|
hID, err := strconv.Atoi(parts[1])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid host id for user namespace remapping, %w", err)
|
||||||
|
}
|
||||||
|
size, err := strconv.Atoi(parts[2])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid size for user namespace remapping, %w", err)
|
||||||
|
}
|
||||||
|
if cID != 0 || hID < 0 || size < 0 {
|
||||||
|
return nil, fmt.Errorf("invalid mapping %s, all IDs and size must be positive integers (container ID of 0 is only supported)", mapping)
|
||||||
|
}
|
||||||
|
return []syscall.SysProcIDMap{
|
||||||
|
{
|
||||||
|
ContainerID: cID,
|
||||||
|
HostID: hID,
|
||||||
|
Size: size,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IDMapMount applies GID/UID shift according to gidmap/uidmap for target path
|
||||||
|
func IDMapMount(source, target string, usernsFd int) (err error) {
|
||||||
|
var (
|
||||||
|
attr unix.MountAttr
|
||||||
|
)
|
||||||
|
|
||||||
|
attr.Attr_set = unix.MOUNT_ATTR_IDMAP
|
||||||
|
attr.Attr_clr = 0
|
||||||
|
attr.Propagation = 0
|
||||||
|
attr.Userns_fd = uint64(usernsFd)
|
||||||
|
|
||||||
|
dFd, err := unix.OpenTree(-int(unix.EBADF), source, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC|unix.AT_EMPTY_PATH))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Unable to open tree for %s: %w", target, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer unix.Close(dFd)
|
||||||
|
if err = unix.MountSetattr(dFd, "", unix.AT_EMPTY_PATH, &attr); err != nil {
|
||||||
|
return fmt.Errorf("Unable to shift GID/UID for %s: %w", target, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = unix.MoveMount(dFd, "", -int(unix.EBADF), target, unix.MOVE_MOUNT_F_EMPTY_PATH); err != nil {
|
||||||
|
return fmt.Errorf("Unable to attach mount tree to %s: %w", target, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetUsernsFD forks the current process and creates a user namespace using the specified
|
||||||
|
// mappings.
|
||||||
|
//
|
||||||
|
// It returns:
|
||||||
|
// 1. The file descriptor of the /proc/[pid]/ns/user of the newly
|
||||||
|
// created mapping.
|
||||||
|
// 2. "Clean up" function that should be called once user namespace
|
||||||
|
// file descriptor is no longer needed.
|
||||||
|
// 3. Usual error.
|
||||||
|
func GetUsernsFD(uidmap, gidmap string) (_ int, _ func(), err error) {
|
||||||
|
var (
|
||||||
|
usernsFile *os.File
|
||||||
|
pipeMap [2]int
|
||||||
|
pid uintptr
|
||||||
|
errno syscall.Errno
|
||||||
|
uidMaps, gidMaps []syscall.SysProcIDMap
|
||||||
|
)
|
||||||
|
|
||||||
|
if uidMaps, err = parseIDMapping(uidmap); err != nil {
|
||||||
|
return -1, nil, err
|
||||||
|
}
|
||||||
|
if gidMaps, err = parseIDMapping(gidmap); err != nil {
|
||||||
|
return -1, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
syscall.ForkLock.Lock()
|
||||||
|
if err = syscall.Pipe2(pipeMap[:], syscall.O_CLOEXEC); err != nil {
|
||||||
|
syscall.ForkLock.Unlock()
|
||||||
|
return -1, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
pid, errno = sys.ForkUserns(pipeMap)
|
||||||
|
syscall.ForkLock.Unlock()
|
||||||
|
if errno != 0 {
|
||||||
|
syscall.Close(pipeMap[0])
|
||||||
|
syscall.Close(pipeMap[1])
|
||||||
|
return -1, nil, errno
|
||||||
|
}
|
||||||
|
|
||||||
|
syscall.Close(pipeMap[0])
|
||||||
|
|
||||||
|
writeMappings := func(fname string, idmap []syscall.SysProcIDMap) error {
|
||||||
|
mappings := ""
|
||||||
|
for _, m := range idmap {
|
||||||
|
mappings = fmt.Sprintf("%d %d %d\n", m.ContainerID, m.HostID, m.Size)
|
||||||
|
}
|
||||||
|
return os.WriteFile(fmt.Sprintf("/proc/%d/%s", pid, fname), []byte(mappings), 0600)
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanUpChild := func() {
|
||||||
|
sync := sys.ProcSyncExit
|
||||||
|
if _, _, errno := syscall.Syscall6(syscall.SYS_WRITE, uintptr(pipeMap[1]), uintptr(unsafe.Pointer(&sync)), unsafe.Sizeof(sync), 0, 0, 0); errno != 0 {
|
||||||
|
logrus.WithError(errno).Warnf("failed to sync with child (ProcSyncExit)")
|
||||||
|
}
|
||||||
|
syscall.Close(pipeMap[1])
|
||||||
|
|
||||||
|
if _, err := unix.Wait4(int(pid), nil, 0, nil); err != nil {
|
||||||
|
logrus.WithError(err).Warnf("failed to wait for child process; the SIGHLD might be received by shim reaper")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defer cleanUpChild()
|
||||||
|
|
||||||
|
if err := writeMappings("uid_map", uidMaps); err != nil {
|
||||||
|
return -1, nil, err
|
||||||
|
}
|
||||||
|
if err := writeMappings("gid_map", gidMaps); err != nil {
|
||||||
|
return -1, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if usernsFile, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", pid)); err != nil {
|
||||||
|
return -1, nil, fmt.Errorf("failed to get user ns file descriptor for - /proc/%d/user/ns: %w", pid, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(usernsFile.Fd()), func() {
|
||||||
|
usernsFile.Close()
|
||||||
|
}, nil
|
||||||
|
}
|
@ -21,14 +21,26 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
exec "golang.org/x/sys/execabs"
|
exec "golang.org/x/sys/execabs"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type mountOpt struct {
|
||||||
|
flags int
|
||||||
|
data []string
|
||||||
|
losetup bool
|
||||||
|
uidmap string
|
||||||
|
gidmap string
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
pagesize = 4096
|
pagesize = 4096
|
||||||
allowedHelperBinaries = []string{"mount.fuse", "mount.fuse3"}
|
allowedHelperBinaries = []string{"mount.fuse", "mount.fuse3"}
|
||||||
@ -38,6 +50,34 @@ func init() {
|
|||||||
pagesize = os.Getpagesize()
|
pagesize = os.Getpagesize()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prepareIDMappedOverlay is a helper function to obtain
|
||||||
|
// actual "lowerdir=..." mount options. It creates and
|
||||||
|
// applies id mapping for each lowerdir.
|
||||||
|
//
|
||||||
|
// It returns:
|
||||||
|
// 1. New options that include new "lowedir=..." mount option.
|
||||||
|
// 2. "Clean up" function -- it should be called as a defer one before
|
||||||
|
// checking for error, because if do the second and avoid calling "clean up",
|
||||||
|
// you're going to have "dirty" setup -- there's no guarantee that those
|
||||||
|
// temporary mount points for lowedirs will be cleaned properly.
|
||||||
|
// 3. Error -- nil if everything's fine, otherwise an error.
|
||||||
|
func prepareIDMappedOverlay(usernsFd int, options []string) ([]string, func(), error) {
|
||||||
|
lowerIdx, lowerDirs := findOverlayLowerdirs(options)
|
||||||
|
if lowerIdx == -1 {
|
||||||
|
return options, nil, fmt.Errorf("failed to parse overlay lowerdir's from given options")
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpLowerdirs, idMapCleanUp, err := doPrepareIDMappedOverlay(lowerDirs, usernsFd)
|
||||||
|
if err != nil {
|
||||||
|
return options, idMapCleanUp, fmt.Errorf("failed to create idmapped mount: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
options = append(options[:lowerIdx], options[lowerIdx+1:]...)
|
||||||
|
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(tmpLowerdirs, ":")))
|
||||||
|
|
||||||
|
return options, idMapCleanUp, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Mount to the provided target path.
|
// Mount to the provided target path.
|
||||||
//
|
//
|
||||||
// If m.Type starts with "fuse." or "fuse3.", "mount.fuse" or "mount.fuse3"
|
// If m.Type starts with "fuse." or "fuse3.", "mount.fuse" or "mount.fuse3"
|
||||||
@ -51,45 +91,81 @@ func (m *Mount) mount(target string) (err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
var (
|
var (
|
||||||
chdir string
|
chdir string
|
||||||
options = m.Options
|
recalcOpt bool
|
||||||
|
usernsFd int
|
||||||
|
options = m.Options
|
||||||
)
|
)
|
||||||
|
opt := parseMountOptions(options)
|
||||||
|
// The only remapping of both GID and UID is supported
|
||||||
|
if opt.uidmap != "" && opt.gidmap != "" {
|
||||||
|
var (
|
||||||
|
childProcCleanUp func()
|
||||||
|
)
|
||||||
|
if usernsFd, childProcCleanUp, err = GetUsernsFD(opt.uidmap, opt.gidmap); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer childProcCleanUp()
|
||||||
|
|
||||||
|
// overlay expects lowerdir's to be remapped instead
|
||||||
|
if m.Type == "overlay" {
|
||||||
|
var (
|
||||||
|
userNsCleanUp func()
|
||||||
|
)
|
||||||
|
options, userNsCleanUp, err = prepareIDMappedOverlay(usernsFd, options)
|
||||||
|
defer userNsCleanUp()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to prepare idmapped overlay: %w", err)
|
||||||
|
}
|
||||||
|
// To not meet concurrency issues while using the same lowedirs
|
||||||
|
// for different containers, replace them by temporary directories,
|
||||||
|
if optionsSize(options) >= pagesize-512 {
|
||||||
|
recalcOpt = true
|
||||||
|
} else {
|
||||||
|
opt = parseMountOptions(options)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// avoid hitting one page limit of mount argument buffer
|
// avoid hitting one page limit of mount argument buffer
|
||||||
//
|
//
|
||||||
// NOTE: 512 is a buffer during pagesize check.
|
// NOTE: 512 is a buffer during pagesize check.
|
||||||
if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
|
if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
|
||||||
chdir, options = compactLowerdirOption(options)
|
chdir, options = compactLowerdirOption(options)
|
||||||
|
// recalculate opt in case of lowerdirs have been replaced
|
||||||
|
// by idmapped ones OR idmapped mounts' not used/supported.
|
||||||
|
if recalcOpt || (opt.uidmap == "" || opt.gidmap == "") {
|
||||||
|
opt = parseMountOptions(options)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flags, data, losetup := parseMountOptions(options)
|
|
||||||
|
|
||||||
// propagation types.
|
// propagation types.
|
||||||
const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
|
const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
|
||||||
|
|
||||||
// Ensure propagation type change flags aren't included in other calls.
|
// Ensure propagation type change flags aren't included in other calls.
|
||||||
oflags := flags &^ ptypes
|
oflags := opt.flags &^ ptypes
|
||||||
|
|
||||||
var loopParams LoopParams
|
var loopParams LoopParams
|
||||||
if losetup {
|
if opt.losetup {
|
||||||
loopParams = LoopParams{
|
loopParams = LoopParams{
|
||||||
Readonly: oflags&unix.MS_RDONLY == unix.MS_RDONLY,
|
Readonly: oflags&unix.MS_RDONLY == unix.MS_RDONLY,
|
||||||
Autoclear: true,
|
Autoclear: true,
|
||||||
}
|
}
|
||||||
loopParams.Direct, data = hasDirectIO(data)
|
loopParams.Direct, opt.data = hasDirectIO(opt.data)
|
||||||
}
|
}
|
||||||
|
|
||||||
dataInStr := strings.Join(data, ",")
|
dataInStr := strings.Join(opt.data, ",")
|
||||||
if len(dataInStr) > pagesize {
|
if len(dataInStr) > pagesize {
|
||||||
return errors.New("mount options is too long")
|
return errors.New("mount options is too long")
|
||||||
}
|
}
|
||||||
|
|
||||||
// In the case of remounting with changed data (data != ""), need to call mount (moby/moby#34077).
|
// In the case of remounting with changed data (dataInStr != ""), need to call mount (moby/moby#34077).
|
||||||
if flags&unix.MS_REMOUNT == 0 || dataInStr != "" {
|
if opt.flags&unix.MS_REMOUNT == 0 || dataInStr != "" {
|
||||||
// Initial call applying all non-propagation flags for mount
|
// Initial call applying all non-propagation flags for mount
|
||||||
// or remount with changed data
|
// or remount with changed data
|
||||||
source := m.Source
|
source := m.Source
|
||||||
if losetup {
|
if opt.losetup {
|
||||||
loFile, err := setupLoop(m.Source, loopParams)
|
loFile, err := setupLoop(m.Source, loopParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -104,10 +180,10 @@ func (m *Mount) mount(target string) (err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if flags&ptypes != 0 {
|
if opt.flags&ptypes != 0 {
|
||||||
// Change the propagation type.
|
// Change the propagation type.
|
||||||
const pflags = ptypes | unix.MS_REC | unix.MS_SILENT
|
const pflags = ptypes | unix.MS_REC | unix.MS_SILENT
|
||||||
if err := unix.Mount("", target, "", uintptr(flags&pflags), ""); err != nil {
|
if err := unix.Mount("", target, "", uintptr(opt.flags&pflags), ""); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -117,9 +193,45 @@ func (m *Mount) mount(target string) (err error) {
|
|||||||
// Remount the bind to apply read only.
|
// Remount the bind to apply read only.
|
||||||
return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "")
|
return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remap non-overlay mount point
|
||||||
|
if opt.uidmap != "" && opt.gidmap != "" && m.Type != "overlay" {
|
||||||
|
if err := IDMapMount(target, target, usernsFd); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func doPrepareIDMappedOverlay(lowerDirs []string, usernsFd int) (tmpLowerDirs []string, _ func(), _ error) {
|
||||||
|
td, err := os.MkdirTemp(tempMountLocation, "ovl-idmapped")
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
cleanUp := func() {
|
||||||
|
for _, lowerDir := range tmpLowerDirs {
|
||||||
|
if err := unix.Unmount(lowerDir, 0); err != nil {
|
||||||
|
logrus.WithError(err).Warnf("failed to unmount temp lowerdir %s", lowerDir)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if terr := os.RemoveAll(filepath.Clean(filepath.Join(tmpLowerDirs[0], ".."))); terr != nil {
|
||||||
|
logrus.WithError(terr).Warnf("failed to remove temporary overlay lowerdir's")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, lowerDir := range lowerDirs {
|
||||||
|
tmpLowerDir := filepath.Join(td, strconv.Itoa(i))
|
||||||
|
tmpLowerDirs = append(tmpLowerDirs, tmpLowerDir)
|
||||||
|
|
||||||
|
if err = os.MkdirAll(tmpLowerDir, 0700); err != nil {
|
||||||
|
return nil, cleanUp, fmt.Errorf("failed to create temporary dir: %w", err)
|
||||||
|
}
|
||||||
|
if err = IDMapMount(lowerDir, tmpLowerDir, usernsFd); err != nil {
|
||||||
|
return nil, cleanUp, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tmpLowerDirs, cleanUp, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Unmount the provided mount path with the flags
|
// Unmount the provided mount path with the flags
|
||||||
func Unmount(target string, flags int) error {
|
func Unmount(target string, flags int) error {
|
||||||
if err := unmount(target, flags); err != nil && err != unix.EINVAL {
|
if err := unmount(target, flags); err != nil && err != unix.EINVAL {
|
||||||
@ -208,14 +320,9 @@ func UnmountAll(mount string, flags int) error {
|
|||||||
|
|
||||||
// parseMountOptions takes fstab style mount options and parses them for
|
// parseMountOptions takes fstab style mount options and parses them for
|
||||||
// use with a standard mount() syscall
|
// use with a standard mount() syscall
|
||||||
func parseMountOptions(options []string) (int, []string, bool) {
|
func parseMountOptions(options []string) (opt mountOpt) {
|
||||||
var (
|
|
||||||
flag int
|
|
||||||
losetup bool
|
|
||||||
data []string
|
|
||||||
)
|
|
||||||
loopOpt := "loop"
|
loopOpt := "loop"
|
||||||
flags := map[string]struct {
|
flagsMap := map[string]struct {
|
||||||
clear bool
|
clear bool
|
||||||
flag int
|
flag int
|
||||||
}{
|
}{
|
||||||
@ -249,19 +356,23 @@ func parseMountOptions(options []string) (int, []string, bool) {
|
|||||||
// If the option does not exist in the flags table or the flag
|
// If the option does not exist in the flags table or the flag
|
||||||
// is not supported on the platform,
|
// is not supported on the platform,
|
||||||
// then it is a data value for a specific fs type
|
// then it is a data value for a specific fs type
|
||||||
if f, exists := flags[o]; exists && f.flag != 0 {
|
if f, exists := flagsMap[o]; exists && f.flag != 0 {
|
||||||
if f.clear {
|
if f.clear {
|
||||||
flag &^= f.flag
|
opt.flags &^= f.flag
|
||||||
} else {
|
} else {
|
||||||
flag |= f.flag
|
opt.flags |= f.flag
|
||||||
}
|
}
|
||||||
} else if o == loopOpt {
|
} else if o == loopOpt {
|
||||||
losetup = true
|
opt.losetup = true
|
||||||
|
} else if strings.HasPrefix(o, "uidmap=") {
|
||||||
|
opt.uidmap = strings.TrimPrefix(o, "uidmap=")
|
||||||
|
} else if strings.HasPrefix(o, "gidmap=") {
|
||||||
|
opt.gidmap = strings.TrimPrefix(o, "gidmap=")
|
||||||
} else {
|
} else {
|
||||||
data = append(data, o)
|
opt.data = append(opt.data, o)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return flag, data, losetup
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func hasDirectIO(opts []string) (bool, []string) {
|
func hasDirectIO(opts []string) (bool, []string) {
|
||||||
|
30
sys/subprocess_unsafe_linux.go
Normal file
30
sys/subprocess_unsafe_linux.go
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sys
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "unsafe" // required for go:linkname.
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:linkname beforeFork syscall.runtime_BeforeFork
|
||||||
|
func beforeFork()
|
||||||
|
|
||||||
|
//go:linkname afterFork syscall.runtime_AfterFork
|
||||||
|
func afterFork()
|
||||||
|
|
||||||
|
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
||||||
|
func afterForkInChild()
|
65
sys/userns_unsafe_linux.go
Normal file
65
sys/userns_unsafe_linux.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sys
|
||||||
|
|
||||||
|
import (
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProcSyncType is used for synchronization
|
||||||
|
// between parent and child processes.
|
||||||
|
type ProcSyncType uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
// ProcSyncExit tells child "it's time to exit".
|
||||||
|
ProcSyncExit ProcSyncType = 0x1
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:norace
|
||||||
|
//go:noinline
|
||||||
|
func ForkUserns(pipeMap [2]int) (pid uintptr, errno syscall.Errno) {
|
||||||
|
var sync ProcSyncType
|
||||||
|
|
||||||
|
beforeFork()
|
||||||
|
if runtime.GOARCH == "s390x" {
|
||||||
|
pid, _, errno = syscall.RawSyscall6(uintptr(syscall.SYS_CLONE), 0, syscall.CLONE_NEWUSER|uintptr(syscall.SIGCHLD), 0, 0, 0, 0)
|
||||||
|
} else {
|
||||||
|
pid, _, errno = syscall.RawSyscall6(uintptr(syscall.SYS_CLONE), syscall.CLONE_NEWUSER|uintptr(syscall.SIGCHLD), 0, 0, 0, 0, 0)
|
||||||
|
}
|
||||||
|
if errno != 0 || pid != 0 {
|
||||||
|
afterFork()
|
||||||
|
return pid, errno
|
||||||
|
}
|
||||||
|
|
||||||
|
afterForkInChild()
|
||||||
|
if _, _, errno = syscall.RawSyscall(syscall.SYS_CLOSE, uintptr(pipeMap[1]), 0, 0); errno != 0 {
|
||||||
|
goto err
|
||||||
|
}
|
||||||
|
if _, _, errno = syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0, 0, 0, 0); errno != 0 {
|
||||||
|
goto err
|
||||||
|
}
|
||||||
|
// wait for parent's signal
|
||||||
|
if _, _, errno = syscall.RawSyscall6(syscall.SYS_READ, uintptr(pipeMap[0]), uintptr(unsafe.Pointer(&sync)), unsafe.Sizeof(sync), 0, 0, 0); errno != 0 || sync != ProcSyncExit {
|
||||||
|
goto err
|
||||||
|
}
|
||||||
|
|
||||||
|
err:
|
||||||
|
syscall.RawSyscall6(syscall.SYS_EXIT, uintptr(errno), 0, 0, 0, 0, 0)
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user