 34d5878185
			
		
	
	34d5878185
	
	
	
		
			
			- Add Target to mount.Mount. - Add UnmountMounts to unmount a list of mounts in reverse order. - Add UnmountRecursive to unmount deepest mount first for a given target, using moby/sys/mountinfo. Signed-off-by: Edgar Lee <edgarhinshunlee@gmail.com>
		
			
				
	
	
		
			433 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			433 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| */
 | |
| 
 | |
| package mount
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"path"
 | |
| 	"runtime"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	exec "golang.org/x/sys/execabs"
 | |
| 	"golang.org/x/sys/unix"
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	pagesize              = 4096
 | |
| 	allowedHelperBinaries = []string{"mount.fuse", "mount.fuse3"}
 | |
| )
 | |
| 
 | |
| func init() {
 | |
| 	pagesize = os.Getpagesize()
 | |
| }
 | |
| 
 | |
| // Mount to the provided target path.
 | |
| //
 | |
| // If m.Type starts with "fuse." or "fuse3.", "mount.fuse" or "mount.fuse3"
 | |
| // helper binary is called.
 | |
| func (m *Mount) mount(target string) (err error) {
 | |
| 	for _, helperBinary := range allowedHelperBinaries {
 | |
| 		// helperBinary = "mount.fuse", typePrefix = "fuse."
 | |
| 		typePrefix := strings.TrimPrefix(helperBinary, "mount.") + "."
 | |
| 		if strings.HasPrefix(m.Type, typePrefix) {
 | |
| 			return m.mountWithHelper(helperBinary, typePrefix, target)
 | |
| 		}
 | |
| 	}
 | |
| 	var (
 | |
| 		chdir   string
 | |
| 		options = m.Options
 | |
| 	)
 | |
| 
 | |
| 	// avoid hitting one page limit of mount argument buffer
 | |
| 	//
 | |
| 	// NOTE: 512 is a buffer during pagesize check.
 | |
| 	if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
 | |
| 		chdir, options = compactLowerdirOption(options)
 | |
| 	}
 | |
| 
 | |
| 	flags, data, losetup := parseMountOptions(options)
 | |
| 	if len(data) > pagesize {
 | |
| 		return errors.New("mount options is too long")
 | |
| 	}
 | |
| 
 | |
| 	// propagation types.
 | |
| 	const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
 | |
| 
 | |
| 	// Ensure propagation type change flags aren't included in other calls.
 | |
| 	oflags := flags &^ ptypes
 | |
| 
 | |
| 	// In the case of remounting with changed data (data != ""), need to call mount (moby/moby#34077).
 | |
| 	if flags&unix.MS_REMOUNT == 0 || data != "" {
 | |
| 		// Initial call applying all non-propagation flags for mount
 | |
| 		// or remount with changed data
 | |
| 		source := m.Source
 | |
| 		if losetup {
 | |
| 			loFile, err := setupLoop(m.Source, LoopParams{
 | |
| 				Readonly:  oflags&unix.MS_RDONLY == unix.MS_RDONLY,
 | |
| 				Autoclear: true})
 | |
| 			if err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 			defer loFile.Close()
 | |
| 
 | |
| 			// Mount the loop device instead
 | |
| 			source = loFile.Name()
 | |
| 		}
 | |
| 		if err := mountAt(chdir, source, target, m.Type, uintptr(oflags), data); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if flags&ptypes != 0 {
 | |
| 		// Change the propagation type.
 | |
| 		const pflags = ptypes | unix.MS_REC | unix.MS_SILENT
 | |
| 		if err := unix.Mount("", target, "", uintptr(flags&pflags), ""); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	const broflags = unix.MS_BIND | unix.MS_RDONLY
 | |
| 	if oflags&broflags == broflags {
 | |
| 		// Remount the bind to apply read only.
 | |
| 		return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "")
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Unmount the provided mount path with the flags
 | |
| func Unmount(target string, flags int) error {
 | |
| 	if err := unmount(target, flags); err != nil && err != unix.EINVAL {
 | |
| 		return err
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // fuseSuperMagic is defined in statfs(2)
 | |
| const fuseSuperMagic = 0x65735546
 | |
| 
 | |
| func isFUSE(dir string) bool {
 | |
| 	var st unix.Statfs_t
 | |
| 	if err := unix.Statfs(dir, &st); err != nil {
 | |
| 		return false
 | |
| 	}
 | |
| 	return st.Type == fuseSuperMagic
 | |
| }
 | |
| 
 | |
| // unmountFUSE attempts to unmount using fusermount/fusermount3 helper binary.
 | |
| //
 | |
| // For FUSE mounts, using these helper binaries is preferred, see:
 | |
| // https://github.com/containerd/containerd/pull/3765#discussion_r342083514
 | |
| func unmountFUSE(target string) error {
 | |
| 	var err error
 | |
| 	for _, helperBinary := range []string{"fusermount3", "fusermount"} {
 | |
| 		cmd := exec.Command(helperBinary, "-u", target)
 | |
| 		err = cmd.Run()
 | |
| 		if err == nil {
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func unmount(target string, flags int) error {
 | |
| 	if isFUSE(target) {
 | |
| 		if err := unmountFUSE(target); err == nil {
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| 	for i := 0; i < 50; i++ {
 | |
| 		if err := unix.Unmount(target, flags); err != nil {
 | |
| 			switch err {
 | |
| 			case unix.EBUSY:
 | |
| 				time.Sleep(50 * time.Millisecond)
 | |
| 				continue
 | |
| 			default:
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 		return nil
 | |
| 	}
 | |
| 	return fmt.Errorf("failed to unmount target %s: %w", target, unix.EBUSY)
 | |
| }
 | |
| 
 | |
| // UnmountAll repeatedly unmounts the given mount point until there
 | |
| // are no mounts remaining (EINVAL is returned by mount), which is
 | |
| // useful for undoing a stack of mounts on the same mount point.
 | |
| // UnmountAll all is noop when the first argument is an empty string.
 | |
| // This is done when the containerd client did not specify any rootfs
 | |
| // mounts (e.g. because the rootfs is managed outside containerd)
 | |
| // UnmountAll is noop when the mount path does not exist.
 | |
| func UnmountAll(mount string, flags int) error {
 | |
| 	if mount == "" {
 | |
| 		return nil
 | |
| 	}
 | |
| 	if _, err := os.Stat(mount); os.IsNotExist(err) {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	for {
 | |
| 		if err := unmount(mount, flags); err != nil {
 | |
| 			// EINVAL is returned if the target is not a
 | |
| 			// mount point, indicating that we are
 | |
| 			// done. It can also indicate a few other
 | |
| 			// things (such as invalid flags) which we
 | |
| 			// unfortunately end up squelching here too.
 | |
| 			if err == unix.EINVAL {
 | |
| 				return nil
 | |
| 			}
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // parseMountOptions takes fstab style mount options and parses them for
 | |
| // use with a standard mount() syscall
 | |
| func parseMountOptions(options []string) (int, string, bool) {
 | |
| 	var (
 | |
| 		flag    int
 | |
| 		losetup bool
 | |
| 		data    []string
 | |
| 	)
 | |
| 	loopOpt := "loop"
 | |
| 	flags := map[string]struct {
 | |
| 		clear bool
 | |
| 		flag  int
 | |
| 	}{
 | |
| 		"async":         {true, unix.MS_SYNCHRONOUS},
 | |
| 		"atime":         {true, unix.MS_NOATIME},
 | |
| 		"bind":          {false, unix.MS_BIND},
 | |
| 		"defaults":      {false, 0},
 | |
| 		"dev":           {true, unix.MS_NODEV},
 | |
| 		"diratime":      {true, unix.MS_NODIRATIME},
 | |
| 		"dirsync":       {false, unix.MS_DIRSYNC},
 | |
| 		"exec":          {true, unix.MS_NOEXEC},
 | |
| 		"mand":          {false, unix.MS_MANDLOCK},
 | |
| 		"noatime":       {false, unix.MS_NOATIME},
 | |
| 		"nodev":         {false, unix.MS_NODEV},
 | |
| 		"nodiratime":    {false, unix.MS_NODIRATIME},
 | |
| 		"noexec":        {false, unix.MS_NOEXEC},
 | |
| 		"nomand":        {true, unix.MS_MANDLOCK},
 | |
| 		"norelatime":    {true, unix.MS_RELATIME},
 | |
| 		"nostrictatime": {true, unix.MS_STRICTATIME},
 | |
| 		"nosuid":        {false, unix.MS_NOSUID},
 | |
| 		"rbind":         {false, unix.MS_BIND | unix.MS_REC},
 | |
| 		"relatime":      {false, unix.MS_RELATIME},
 | |
| 		"remount":       {false, unix.MS_REMOUNT},
 | |
| 		"ro":            {false, unix.MS_RDONLY},
 | |
| 		"rw":            {true, unix.MS_RDONLY},
 | |
| 		"strictatime":   {false, unix.MS_STRICTATIME},
 | |
| 		"suid":          {true, unix.MS_NOSUID},
 | |
| 		"sync":          {false, unix.MS_SYNCHRONOUS},
 | |
| 	}
 | |
| 	for _, o := range options {
 | |
| 		// If the option does not exist in the flags table or the flag
 | |
| 		// is not supported on the platform,
 | |
| 		// then it is a data value for a specific fs type
 | |
| 		if f, exists := flags[o]; exists && f.flag != 0 {
 | |
| 			if f.clear {
 | |
| 				flag &^= f.flag
 | |
| 			} else {
 | |
| 				flag |= f.flag
 | |
| 			}
 | |
| 		} else if o == loopOpt {
 | |
| 			losetup = true
 | |
| 		} else {
 | |
| 			data = append(data, o)
 | |
| 		}
 | |
| 	}
 | |
| 	return flag, strings.Join(data, ","), losetup
 | |
| }
 | |
| 
 | |
| // compactLowerdirOption updates overlay lowdir option and returns the common
 | |
| // dir among all the lowdirs.
 | |
| func compactLowerdirOption(opts []string) (string, []string) {
 | |
| 	idx, dirs := findOverlayLowerdirs(opts)
 | |
| 	if idx == -1 || len(dirs) == 1 {
 | |
| 		// no need to compact if there is only one lowerdir
 | |
| 		return "", opts
 | |
| 	}
 | |
| 
 | |
| 	// find out common dir
 | |
| 	commondir := longestCommonPrefix(dirs)
 | |
| 	if commondir == "" {
 | |
| 		return "", opts
 | |
| 	}
 | |
| 
 | |
| 	// NOTE: the snapshot id is based on digits.
 | |
| 	// in order to avoid to get snapshots/x, should be back to parent dir.
 | |
| 	// however, there is assumption that the common dir is ${root}/io.containerd.v1.overlayfs/snapshots.
 | |
| 	commondir = path.Dir(commondir)
 | |
| 	if commondir == "/" {
 | |
| 		return "", opts
 | |
| 	}
 | |
| 	commondir = commondir + "/"
 | |
| 
 | |
| 	newdirs := make([]string, 0, len(dirs))
 | |
| 	for _, dir := range dirs {
 | |
| 		newdirs = append(newdirs, dir[len(commondir):])
 | |
| 	}
 | |
| 
 | |
| 	newopts := copyOptions(opts)
 | |
| 	newopts = append(newopts[:idx], newopts[idx+1:]...)
 | |
| 	newopts = append(newopts, fmt.Sprintf("lowerdir=%s", strings.Join(newdirs, ":")))
 | |
| 	return commondir, newopts
 | |
| }
 | |
| 
 | |
| // findOverlayLowerdirs returns the index of lowerdir in mount's options and
 | |
| // all the lowerdir target.
 | |
| func findOverlayLowerdirs(opts []string) (int, []string) {
 | |
| 	var (
 | |
| 		idx    = -1
 | |
| 		prefix = "lowerdir="
 | |
| 	)
 | |
| 
 | |
| 	for i, opt := range opts {
 | |
| 		if strings.HasPrefix(opt, prefix) {
 | |
| 			idx = i
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if idx == -1 {
 | |
| 		return -1, nil
 | |
| 	}
 | |
| 	return idx, strings.Split(opts[idx][len(prefix):], ":")
 | |
| }
 | |
| 
 | |
| // longestCommonPrefix finds the longest common prefix in the string slice.
 | |
| func longestCommonPrefix(strs []string) string {
 | |
| 	if len(strs) == 0 {
 | |
| 		return ""
 | |
| 	} else if len(strs) == 1 {
 | |
| 		return strs[0]
 | |
| 	}
 | |
| 
 | |
| 	// find out the min/max value by alphabetical order
 | |
| 	min, max := strs[0], strs[0]
 | |
| 	for _, str := range strs[1:] {
 | |
| 		if min > str {
 | |
| 			min = str
 | |
| 		}
 | |
| 		if max < str {
 | |
| 			max = str
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// find out the common part between min and max
 | |
| 	for i := 0; i < len(min) && i < len(max); i++ {
 | |
| 		if min[i] != max[i] {
 | |
| 			return min[:i]
 | |
| 		}
 | |
| 	}
 | |
| 	return min
 | |
| }
 | |
| 
 | |
| // copyOptions copies the options.
 | |
| func copyOptions(opts []string) []string {
 | |
| 	if len(opts) == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	acopy := make([]string, len(opts))
 | |
| 	copy(acopy, opts)
 | |
| 	return acopy
 | |
| }
 | |
| 
 | |
| // optionsSize returns the byte size of options of mount.
 | |
| func optionsSize(opts []string) int {
 | |
| 	size := 0
 | |
| 	for _, opt := range opts {
 | |
| 		size += len(opt)
 | |
| 	}
 | |
| 	return size
 | |
| }
 | |
| 
 | |
| func mountAt(chdir string, source, target, fstype string, flags uintptr, data string) error {
 | |
| 	if chdir == "" {
 | |
| 		return unix.Mount(source, target, fstype, flags, data)
 | |
| 	}
 | |
| 
 | |
| 	ch := make(chan error, 1)
 | |
| 	go func() {
 | |
| 		runtime.LockOSThread()
 | |
| 
 | |
| 		// Do not unlock this thread.
 | |
| 		// If the thread is unlocked go will try to use it for other goroutines.
 | |
| 		// However it is not possible to restore the thread state after CLONE_FS.
 | |
| 		//
 | |
| 		// Once the goroutine exits the thread should eventually be terminated by go.
 | |
| 
 | |
| 		if err := unix.Unshare(unix.CLONE_FS); err != nil {
 | |
| 			ch <- err
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		if err := unix.Chdir(chdir); err != nil {
 | |
| 			ch <- err
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		ch <- unix.Mount(source, target, fstype, flags, data)
 | |
| 	}()
 | |
| 	return <-ch
 | |
| }
 | |
| 
 | |
| func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error {
 | |
| 	// helperBinary: "mount.fuse3"
 | |
| 	// target: "/foo/merged"
 | |
| 	// m.Type: "fuse3.fuse-overlayfs"
 | |
| 	// command: "mount.fuse3 overlay /foo/merged -o lowerdir=/foo/lower2:/foo/lower1,upperdir=/foo/upper,workdir=/foo/work -t fuse-overlayfs"
 | |
| 	args := []string{m.Source, target}
 | |
| 	for _, o := range m.Options {
 | |
| 		args = append(args, "-o", o)
 | |
| 	}
 | |
| 	args = append(args, "-t", strings.TrimPrefix(m.Type, typePrefix))
 | |
| 
 | |
| 	infoBeforeMount, err := Lookup(target)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// cmd.CombinedOutput() may intermittently return ECHILD because of our signal handling in shim.
 | |
| 	// See #4387 and wait(2).
 | |
| 	const retriesOnECHILD = 10
 | |
| 	for i := 0; i < retriesOnECHILD; i++ {
 | |
| 		cmd := exec.Command(helperBinary, args...)
 | |
| 		out, err := cmd.CombinedOutput()
 | |
| 		if err == nil {
 | |
| 			return nil
 | |
| 		}
 | |
| 		if !errors.Is(err, unix.ECHILD) {
 | |
| 			return fmt.Errorf("mount helper [%s %v] failed: %q: %w", helperBinary, args, string(out), err)
 | |
| 		}
 | |
| 		// We got ECHILD, we are not sure whether the mount was successful.
 | |
| 		// If the mount ID has changed, we are sure we got some new mount, but still not sure it is fully completed.
 | |
| 		// So we attempt to unmount the new mount before retrying.
 | |
| 		infoAfterMount, err := Lookup(target)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if infoAfterMount.ID != infoBeforeMount.ID {
 | |
| 			_ = unmount(target, 0)
 | |
| 		}
 | |
| 	}
 | |
| 	return fmt.Errorf("mount helper [%s %v] failed with ECHILD (retired %d times)", helperBinary, args, retriesOnECHILD)
 | |
| }
 |