Support >= 128 layers in overlayfs snapshots
Auto-detect longest common dir in lowerdir option and compact it if the option size is hitting one page size. If does, Use chdir + CLONE to do mount thing to avoid hitting one page argument buffer in linux kernel mount. Signed-off-by: Wei Fu <fhfuwei@163.com>
This commit is contained in:
parent
26e2dd6754
commit
67b54c6670
@ -17,16 +17,41 @@
|
|||||||
package mount
|
package mount
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/sys"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var pagesize = 4096
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
pagesize = os.Getpagesize()
|
||||||
|
}
|
||||||
|
|
||||||
// Mount to the provided target path
|
// Mount to the provided target path
|
||||||
func (m *Mount) Mount(target string) error {
|
func (m *Mount) Mount(target string) error {
|
||||||
flags, data := parseMountOptions(m.Options)
|
var (
|
||||||
|
chdir string
|
||||||
|
options = m.Options
|
||||||
|
)
|
||||||
|
|
||||||
|
// avoid hitting one page limit of mount argument buffer
|
||||||
|
//
|
||||||
|
// NOTE: 512 is a buffer during pagesize check.
|
||||||
|
if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
|
||||||
|
chdir, options = compactLowerdirOption(options)
|
||||||
|
}
|
||||||
|
|
||||||
|
flags, data := parseMountOptions(options)
|
||||||
|
if len(data) > pagesize {
|
||||||
|
return errors.Errorf("mount options is too long")
|
||||||
|
}
|
||||||
|
|
||||||
// propagation types.
|
// propagation types.
|
||||||
const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
|
const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
|
||||||
@ -38,7 +63,7 @@ func (m *Mount) Mount(target string) error {
|
|||||||
if flags&unix.MS_REMOUNT == 0 || data != "" {
|
if flags&unix.MS_REMOUNT == 0 || data != "" {
|
||||||
// Initial call applying all non-propagation flags for mount
|
// Initial call applying all non-propagation flags for mount
|
||||||
// or remount with changed data
|
// or remount with changed data
|
||||||
if err := unix.Mount(m.Source, target, m.Type, uintptr(oflags), data); err != nil {
|
if err := mountAt(chdir, m.Source, target, m.Type, uintptr(oflags), data); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -155,3 +180,129 @@ func parseMountOptions(options []string) (int, string) {
|
|||||||
}
|
}
|
||||||
return flag, strings.Join(data, ",")
|
return flag, strings.Join(data, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compactLowerdirOption updates overlay lowdir option and returns the common
|
||||||
|
// dir among all the lowdirs.
|
||||||
|
func compactLowerdirOption(opts []string) (string, []string) {
|
||||||
|
idx, dirs := findOverlayLowerdirs(opts)
|
||||||
|
if idx == -1 || len(dirs) == 1 {
|
||||||
|
// no need to compact if there is only one lowerdir
|
||||||
|
return "", opts
|
||||||
|
}
|
||||||
|
|
||||||
|
// find out common dir
|
||||||
|
commondir := longestCommonPrefix(dirs)
|
||||||
|
if commondir == "" {
|
||||||
|
return "", opts
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: the snapshot id is based on digits.
|
||||||
|
// in order to avoid to get snapshots/x, should be back to parent dir.
|
||||||
|
// however, there is assumption that the common dir is ${root}/io.containerd.v1.overlayfs/snapshots.
|
||||||
|
commondir = path.Dir(commondir)
|
||||||
|
if commondir == "/" {
|
||||||
|
return "", opts
|
||||||
|
}
|
||||||
|
commondir = commondir + "/"
|
||||||
|
|
||||||
|
newdirs := make([]string, 0, len(dirs))
|
||||||
|
for _, dir := range dirs {
|
||||||
|
newdirs = append(newdirs, dir[len(commondir):])
|
||||||
|
}
|
||||||
|
|
||||||
|
newopts := copyOptions(opts)
|
||||||
|
newopts = append(newopts[:idx], newopts[idx+1:]...)
|
||||||
|
newopts = append(newopts, fmt.Sprintf("lowerdir=%s", strings.Join(newdirs, ":")))
|
||||||
|
return commondir, newopts
|
||||||
|
}
|
||||||
|
|
||||||
|
// findOverlayLowerdirs returns the index of lowerdir in mount's options and
|
||||||
|
// all the lowerdir target.
|
||||||
|
func findOverlayLowerdirs(opts []string) (int, []string) {
|
||||||
|
var (
|
||||||
|
idx = -1
|
||||||
|
prefix = "lowerdir="
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, opt := range opts {
|
||||||
|
if strings.HasPrefix(opt, prefix) {
|
||||||
|
idx = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx == -1 {
|
||||||
|
return -1, nil
|
||||||
|
}
|
||||||
|
return idx, strings.Split(opts[idx][len(prefix):], ":")
|
||||||
|
}
|
||||||
|
|
||||||
|
// longestCommonPrefix finds the longest common prefix in the string slice.
|
||||||
|
func longestCommonPrefix(strs []string) string {
|
||||||
|
if len(strs) == 0 {
|
||||||
|
return ""
|
||||||
|
} else if len(strs) == 1 {
|
||||||
|
return strs[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// find out the min/max value by alphabetical order
|
||||||
|
min, max := strs[0], strs[0]
|
||||||
|
for _, str := range strs[1:] {
|
||||||
|
if min > str {
|
||||||
|
min = str
|
||||||
|
}
|
||||||
|
if max < str {
|
||||||
|
max = str
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find out the common part between min and max
|
||||||
|
for i := 0; i < len(min) && i < len(max); i++ {
|
||||||
|
if min[i] != max[i] {
|
||||||
|
return min[:i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
|
||||||
|
// copyOptions copies the options.
|
||||||
|
func copyOptions(opts []string) []string {
|
||||||
|
if len(opts) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
acopy := make([]string, len(opts))
|
||||||
|
copy(acopy, opts)
|
||||||
|
return acopy
|
||||||
|
}
|
||||||
|
|
||||||
|
// optionsSize returns the byte size of options of mount.
|
||||||
|
func optionsSize(opts []string) int {
|
||||||
|
size := 0
|
||||||
|
for _, opt := range opts {
|
||||||
|
size += len(opt)
|
||||||
|
}
|
||||||
|
return size
|
||||||
|
}
|
||||||
|
|
||||||
|
func mountAt(chdir string, source, target, fstype string, flags uintptr, data string) error {
|
||||||
|
if chdir == "" {
|
||||||
|
return unix.Mount(source, target, fstype, flags, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Open(chdir)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrap(err, "failed to mountat")
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
fs, err := f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrap(err, "failed to mountat")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !fs.IsDir() {
|
||||||
|
return errors.Wrap(errors.Errorf("%s is not dir", chdir), "failed to mountat")
|
||||||
|
}
|
||||||
|
return errors.Wrap(sys.FMountat(f.Fd(), source, target, fstype, flags, data), "failed to mountat")
|
||||||
|
}
|
||||||
|
94
mount/mount_linux_test.go
Normal file
94
mount/mount_linux_test.go
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
// +build linux
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package mount
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLongestCommonPrefix(t *testing.T) {
|
||||||
|
tcases := []struct {
|
||||||
|
in []string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{[]string{}, ""},
|
||||||
|
{[]string{"foo"}, "foo"},
|
||||||
|
{[]string{"foo", "bar"}, ""},
|
||||||
|
{[]string{"foo", "foo"}, "foo"},
|
||||||
|
{[]string{"foo", "foobar"}, "foo"},
|
||||||
|
{[]string{"foo", "", "foobar"}, ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tc := range tcases {
|
||||||
|
if got := longestCommonPrefix(tc.in); got != tc.expected {
|
||||||
|
t.Fatalf("[%d case] expected (%s), but got (%s)", i+1, tc.expected, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompactLowerdirOption(t *testing.T) {
|
||||||
|
tcases := []struct {
|
||||||
|
opts []string
|
||||||
|
commondir string
|
||||||
|
newopts []string
|
||||||
|
}{
|
||||||
|
// no lowerdir or only one
|
||||||
|
{
|
||||||
|
[]string{"workdir=a"},
|
||||||
|
"",
|
||||||
|
[]string{"workdir=a"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
[]string{"workdir=a", "lowerdir=b"},
|
||||||
|
"",
|
||||||
|
[]string{"workdir=a", "lowerdir=b"},
|
||||||
|
},
|
||||||
|
|
||||||
|
// >= 2 lowerdir
|
||||||
|
{
|
||||||
|
[]string{"lowerdir=/snapshots/1/fs:/snapshots/10/fs"},
|
||||||
|
"/snapshots/",
|
||||||
|
[]string{"lowerdir=1/fs:10/fs"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
[]string{"lowerdir=/snapshots/1/fs:/snapshots/10/fs:/snapshots/2/fs"},
|
||||||
|
"/snapshots/",
|
||||||
|
[]string{"lowerdir=1/fs:10/fs:2/fs"},
|
||||||
|
},
|
||||||
|
|
||||||
|
// if common dir is /
|
||||||
|
{
|
||||||
|
[]string{"lowerdir=/snapshots/1/fs:/other_snapshots/1/fs"},
|
||||||
|
"",
|
||||||
|
[]string{"lowerdir=/snapshots/1/fs:/other_snapshots/1/fs"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tc := range tcases {
|
||||||
|
dir, opts := compactLowerdirOption(tc.opts)
|
||||||
|
if dir != tc.commondir {
|
||||||
|
t.Fatalf("[%d case] expected common dir (%s), but got (%s)", i+1, tc.commondir, dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(opts, tc.newopts) {
|
||||||
|
t.Fatalf("[%d case] expected options (%v), but got (%v)", i+1, tc.newopts, opts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -63,6 +63,8 @@ func SnapshotterSuite(t *testing.T, name string, snapshotterFn func(ctx context.
|
|||||||
t.Run("StatInWalk", makeTest(name, snapshotterFn, checkStatInWalk))
|
t.Run("StatInWalk", makeTest(name, snapshotterFn, checkStatInWalk))
|
||||||
t.Run("CloseTwice", makeTest(name, snapshotterFn, closeTwice))
|
t.Run("CloseTwice", makeTest(name, snapshotterFn, closeTwice))
|
||||||
t.Run("RootPermission", makeTest(name, snapshotterFn, checkRootPermission))
|
t.Run("RootPermission", makeTest(name, snapshotterFn, checkRootPermission))
|
||||||
|
|
||||||
|
t.Run("128LayersMount", makeTest(name, snapshotterFn, check128LayersMount))
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeTest(name string, snapshotterFn func(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error), fn func(ctx context.Context, t *testing.T, snapshotter snapshots.Snapshotter, work string)) func(t *testing.T) {
|
func makeTest(name string, snapshotterFn func(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error), fn func(ctx context.Context, t *testing.T, snapshotter snapshots.Snapshotter, work string)) func(t *testing.T) {
|
||||||
@ -860,3 +862,94 @@ func checkRootPermission(ctx context.Context, t *testing.T, snapshotter snapshot
|
|||||||
t.Fatalf("expected 0755, got 0%o", mode)
|
t.Fatalf("expected 0755, got 0%o", mode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func check128LayersMount(ctx context.Context, t *testing.T, snapshotter snapshots.Snapshotter, work string) {
|
||||||
|
lowestApply := fstest.Apply(
|
||||||
|
fstest.CreateFile("/bottom", []byte("way at the bottom\n"), 0777),
|
||||||
|
fstest.CreateFile("/overwriteme", []byte("FIRST!\n"), 0777),
|
||||||
|
fstest.CreateDir("/ADDHERE", 0755),
|
||||||
|
fstest.CreateDir("/ONLYME", 0755),
|
||||||
|
fstest.CreateFile("/ONLYME/bottom", []byte("bye!\n"), 0777),
|
||||||
|
)
|
||||||
|
|
||||||
|
appliers := []fstest.Applier{lowestApply}
|
||||||
|
for i := 1; i <= 127; i++ {
|
||||||
|
appliers = append(appliers, fstest.Apply(
|
||||||
|
fstest.CreateFile("/overwriteme", []byte(fmt.Sprintf("%d WAS HERE!\n", i)), 0777),
|
||||||
|
fstest.CreateFile(fmt.Sprintf("/ADDHERE/file-%d", i), []byte("same\n"), 0755),
|
||||||
|
fstest.RemoveAll("/ONLYME"),
|
||||||
|
fstest.CreateDir("/ONLYME", 0755),
|
||||||
|
fstest.CreateFile(fmt.Sprintf("/ONLYME/file-%d", i), []byte("only me!\n"), 0777),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
flat := filepath.Join(work, "flat")
|
||||||
|
if err := os.MkdirAll(flat, 0777); err != nil {
|
||||||
|
t.Fatalf("failed to create flat dir(%s): %+v", flat, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: add gc labels to avoid snapshots get removed by gc...
|
||||||
|
parent := ""
|
||||||
|
for i, applier := range appliers {
|
||||||
|
preparing := filepath.Join(work, fmt.Sprintf("prepare-layer-%d", i))
|
||||||
|
if err := os.MkdirAll(preparing, 0777); err != nil {
|
||||||
|
t.Fatalf("[layer %d] failed to create preparing dir(%s): %+v", i, preparing, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
mounts, err := snapshotter.Prepare(ctx, preparing, parent, opt)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("[layer %d] failed to get mount info: %+v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := mount.All(mounts, preparing); err != nil {
|
||||||
|
t.Fatalf("[layer %d] failed to mount on the target(%s): %+v", i, preparing, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := fstest.CheckDirectoryEqual(preparing, flat); err != nil {
|
||||||
|
testutil.Unmount(t, preparing)
|
||||||
|
t.Fatalf("[layer %d] preparing doesn't equal to flat before apply: %+v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := applier.Apply(flat); err != nil {
|
||||||
|
testutil.Unmount(t, preparing)
|
||||||
|
t.Fatalf("[layer %d] failed to apply on flat dir: %+v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = applier.Apply(preparing); err != nil {
|
||||||
|
testutil.Unmount(t, preparing)
|
||||||
|
t.Fatalf("[layer %d] failed to apply on preparing dir: %+v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := fstest.CheckDirectoryEqual(preparing, flat); err != nil {
|
||||||
|
testutil.Unmount(t, preparing)
|
||||||
|
t.Fatalf("[layer %d] preparing doesn't equal to flat after apply: %+v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
testutil.Unmount(t, preparing)
|
||||||
|
|
||||||
|
parent = filepath.Join(work, fmt.Sprintf("committed-%d", i))
|
||||||
|
if err := snapshotter.Commit(ctx, parent, preparing, opt); err != nil {
|
||||||
|
t.Fatalf("[layer %d] failed to commit the preparing: %+v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
view := filepath.Join(work, "fullview")
|
||||||
|
if err := os.MkdirAll(view, 0777); err != nil {
|
||||||
|
t.Fatalf("failed to create fullview dir(%s): %+v", view, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
mounts, err := snapshotter.View(ctx, view, parent, opt)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to get view's mount info: %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := mount.All(mounts, view); err != nil {
|
||||||
|
t.Fatalf("failed to mount on the target(%s): %+v", view, err)
|
||||||
|
}
|
||||||
|
defer testutil.Unmount(t, view)
|
||||||
|
|
||||||
|
if err := fstest.CheckDirectoryEqual(view, flat); err != nil {
|
||||||
|
t.Fatalf("fullview should equal to flat: %+v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
119
sys/mount_linux.go
Normal file
119
sys/mount_linux.go
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sys
|
||||||
|
|
||||||
|
import (
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FMountat performs mount from the provided directory.
|
||||||
|
func FMountat(dirfd uintptr, source, target, fstype string, flags uintptr, data string) error {
|
||||||
|
var (
|
||||||
|
sourceP, targetP, fstypeP, dataP *byte
|
||||||
|
pid uintptr
|
||||||
|
ws unix.WaitStatus
|
||||||
|
err error
|
||||||
|
errno syscall.Errno
|
||||||
|
)
|
||||||
|
|
||||||
|
sourceP, err = syscall.BytePtrFromString(source)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
targetP, err = syscall.BytePtrFromString(target)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fstypeP, err = syscall.BytePtrFromString(fstype)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if data != "" {
|
||||||
|
dataP, err = syscall.BytePtrFromString(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime.LockOSThread()
|
||||||
|
defer runtime.UnlockOSThread()
|
||||||
|
|
||||||
|
pid, errno = forkAndMountat(dirfd,
|
||||||
|
uintptr(unsafe.Pointer(sourceP)),
|
||||||
|
uintptr(unsafe.Pointer(targetP)),
|
||||||
|
uintptr(unsafe.Pointer(fstypeP)),
|
||||||
|
flags,
|
||||||
|
uintptr(unsafe.Pointer(dataP)))
|
||||||
|
|
||||||
|
if errno != 0 {
|
||||||
|
return errors.Wrap(errno, "failed to fork thread")
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = unix.Wait4(int(pid), &ws, 0, nil)
|
||||||
|
for err == syscall.EINTR {
|
||||||
|
_, err = unix.Wait4(int(pid), &ws, 0, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "failed to find pid=%d process", pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
errno = syscall.Errno(ws.ExitStatus())
|
||||||
|
if errno != 0 {
|
||||||
|
return errors.Wrap(errno, "failed to mount")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// forkAndMountat will fork thread, change working dir and mount.
|
||||||
|
//
|
||||||
|
// precondition: the runtime OS thread must be locked.
|
||||||
|
func forkAndMountat(dirfd uintptr, source, target, fstype, flags, data uintptr) (pid uintptr, errno syscall.Errno) {
|
||||||
|
// block signal during clone
|
||||||
|
beforeFork()
|
||||||
|
|
||||||
|
// the cloned thread shares the open file descriptor, but the thread
|
||||||
|
// never be reused by runtime.
|
||||||
|
pid, _, errno = syscall.RawSyscall6(syscall.SYS_CLONE, uintptr(syscall.SIGCHLD)|syscall.CLONE_FILES, 0, 0, 0, 0, 0)
|
||||||
|
if errno != 0 || pid != 0 {
|
||||||
|
// restore all signals
|
||||||
|
afterFork()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// restore all signals
|
||||||
|
afterForkInChild()
|
||||||
|
|
||||||
|
// change working dir
|
||||||
|
_, _, errno = syscall.RawSyscall(syscall.SYS_FCHDIR, dirfd, 0, 0)
|
||||||
|
if errno != 0 {
|
||||||
|
goto childerr
|
||||||
|
}
|
||||||
|
_, _, errno = syscall.RawSyscall6(syscall.SYS_MOUNT, source, target, fstype, flags, data, 0)
|
||||||
|
|
||||||
|
childerr:
|
||||||
|
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0)
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
169
sys/mount_linux_test.go
Normal file
169
sys/mount_linux_test.go
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sys
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"syscall"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/containerd/continuity/fs/fstest"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
type fMountatCaseFunc func(t *testing.T, root string)
|
||||||
|
|
||||||
|
func TestFMountat(t *testing.T) {
|
||||||
|
if RunningUnprivileged() {
|
||||||
|
t.Skip("Needs to be run as root")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("Normal", makeTestForFMountat(testFMountatNormal))
|
||||||
|
t.Run("ChdirWithFileFd", makeTestForFMountat(testFMountatWithFileFd))
|
||||||
|
t.Run("MountWithInvalidSource", makeTestForFMountat(testFMountatWithInvalidSource))
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeTestForFMountat(fn fMountatCaseFunc) func(t *testing.T) {
|
||||||
|
return func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
suiteDir, err := ioutil.TempDir("", "fmountat-test-")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(suiteDir)
|
||||||
|
|
||||||
|
fn(t, suiteDir)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFMountatNormal(t *testing.T, root string) {
|
||||||
|
expectedContent := "bye re-exec!\n"
|
||||||
|
apply := fstest.Apply(
|
||||||
|
fstest.CreateFile("/hi", []byte(expectedContent), 0777),
|
||||||
|
)
|
||||||
|
|
||||||
|
workdir := filepath.Join(root, "work")
|
||||||
|
if err := os.MkdirAll(workdir, 0777); err != nil {
|
||||||
|
t.Fatalf("failed to create dir(%s): %+v", workdir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := apply.Apply(workdir); err != nil {
|
||||||
|
t.Fatalf("failed to prepare source dir: %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
atdir := filepath.Join(root, "at")
|
||||||
|
if err := os.MkdirAll(atdir, 0777); err != nil {
|
||||||
|
t.Fatalf("failed to create working dir(%s): %+v", atdir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fsdir := filepath.Join(atdir, "fs")
|
||||||
|
if err := os.MkdirAll(fsdir, 0777); err != nil {
|
||||||
|
t.Fatalf("failed to create mount point dir(%s): %+v", fsdir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Open(atdir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to open dir(%s): %+v", atdir, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
// mount work to fs
|
||||||
|
if err = FMountat(f.Fd(), workdir, "fs", "bind", unix.MS_BIND|unix.MS_RDONLY, ""); err != nil {
|
||||||
|
t.Fatalf("expected no error here, but got error: %+v", err)
|
||||||
|
}
|
||||||
|
defer umount(t, fsdir)
|
||||||
|
|
||||||
|
// check hi file
|
||||||
|
content, err := ioutil.ReadFile(filepath.Join(fsdir, "hi"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to read file: %+v", err)
|
||||||
|
}
|
||||||
|
if got := string(content); got != expectedContent {
|
||||||
|
t.Fatalf("expected to get(%v), but got(%v)", expectedContent, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the working directory
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to get current working dir: %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cwd == atdir {
|
||||||
|
t.Fatal("should not change the current working directory")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFMountatWithFileFd(t *testing.T, root string) {
|
||||||
|
// not a directory
|
||||||
|
expectedErr := syscall.Errno(20)
|
||||||
|
|
||||||
|
emptyFile := filepath.Join(root, "emptyFile")
|
||||||
|
f, err := os.Create(emptyFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create file(%s): %+v", emptyFile, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
err = FMountat(f.Fd(), filepath.Join(root, "empty"), filepath.Join(root, "work"), "", 0, "")
|
||||||
|
if got := errors.Cause(err); got != expectedErr {
|
||||||
|
t.Fatalf("expected error %v, but got %v", expectedErr, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFMountatWithInvalidSource(t *testing.T, root string) {
|
||||||
|
// no such file or directory
|
||||||
|
expectedErr := syscall.Errno(2)
|
||||||
|
|
||||||
|
atdir := filepath.Join(root, "at")
|
||||||
|
if err := os.MkdirAll(atdir, 0777); err != nil {
|
||||||
|
t.Fatalf("failed to create dir(%s): %+v", atdir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Open(root)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to open dir(%s): %+v", atdir, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
err = FMountat(f.Fd(), filepath.Join(root, "oops"), "at", "bind", unix.MS_BIND, "")
|
||||||
|
if got := errors.Cause(err); got != expectedErr {
|
||||||
|
t.Fatalf("expected error %v, but got %v", expectedErr, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func umount(t *testing.T, target string) {
|
||||||
|
for i := 0; i < 50; i++ {
|
||||||
|
if err := unix.Unmount(target, unix.MNT_DETACH); err != nil {
|
||||||
|
switch err {
|
||||||
|
case unix.EBUSY:
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
continue
|
||||||
|
case unix.EINVAL:
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Fatalf("failed to unmount target %s", target)
|
||||||
|
}
|
30
sys/subprocess_unsafe_linux.go
Normal file
30
sys/subprocess_unsafe_linux.go
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sys
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "unsafe" // required for go:linkname.
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:linkname beforeFork syscall.runtime_BeforeFork
|
||||||
|
func beforeFork()
|
||||||
|
|
||||||
|
//go:linkname afterFork syscall.runtime_AfterFork
|
||||||
|
func afterFork()
|
||||||
|
|
||||||
|
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
||||||
|
func afterForkInChild()
|
15
sys/subprocess_unsafe_linux.s
Normal file
15
sys/subprocess_unsafe_linux.s
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
Loading…
Reference in New Issue
Block a user