diff --git a/mount/fmountat_linux.go b/mount/fmountat_linux.go deleted file mode 100644 index 850a92acf..000000000 --- a/mount/fmountat_linux.go +++ /dev/null @@ -1,145 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package mount - -import ( - "fmt" - "runtime" - "syscall" - "unsafe" - - "github.com/containerd/containerd/log" - "golang.org/x/sys/unix" -) - -// fMountat performs mount from the provided directory. -func fMountat(dirfd uintptr, source, target, fstype string, flags uintptr, data string) error { - var ( - sourceP, targetP, fstypeP, dataP *byte - pid uintptr - err error - errno, status syscall.Errno - ) - - sourceP, err = syscall.BytePtrFromString(source) - if err != nil { - return err - } - - targetP, err = syscall.BytePtrFromString(target) - if err != nil { - return err - } - - fstypeP, err = syscall.BytePtrFromString(fstype) - if err != nil { - return err - } - - if data != "" { - dataP, err = syscall.BytePtrFromString(data) - if err != nil { - return err - } - } - - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - var pipefds [2]int - if err := syscall.Pipe2(pipefds[:], syscall.O_CLOEXEC); err != nil { - return fmt.Errorf("failed to open pipe: %w", err) - } - - defer func() { - // close both ends of the pipe in a deferred function, since open file - // descriptor table is shared with child - syscall.Close(pipefds[0]) - syscall.Close(pipefds[1]) - }() - - pid, errno = forkAndMountat(dirfd, - uintptr(unsafe.Pointer(sourceP)), - uintptr(unsafe.Pointer(targetP)), - uintptr(unsafe.Pointer(fstypeP)), - flags, - uintptr(unsafe.Pointer(dataP)), - pipefds[1], - ) - - if errno != 0 { - return fmt.Errorf("failed to fork thread: %w", errno) - } - - defer func() { - _, err := unix.Wait4(int(pid), nil, 0, nil) - for err == syscall.EINTR { - _, err = unix.Wait4(int(pid), nil, 0, nil) - } - - if err != nil { - log.L.WithError(err).Debugf("failed to find pid=%d process", pid) - } - }() - - _, _, errno = syscall.RawSyscall(syscall.SYS_READ, - uintptr(pipefds[0]), - uintptr(unsafe.Pointer(&status)), - unsafe.Sizeof(status)) - if errno != 0 { - return fmt.Errorf("failed to read pipe: %w", errno) - } - - if status != 0 { - return fmt.Errorf("failed to mount: %w", status) - } - - return nil -} - -// forkAndMountat will fork thread, change working dir and mount. -// -// precondition: the runtime OS thread must be locked. -func forkAndMountat(dirfd uintptr, source, target, fstype, flags, data uintptr, pipefd int) (pid uintptr, errno syscall.Errno) { - - // block signal during clone - beforeFork() - - // the cloned thread shares the open file descriptor, but the thread - // never be reused by runtime. - pid, _, errno = syscall.RawSyscall6(syscall.SYS_CLONE, uintptr(syscall.SIGCHLD)|syscall.CLONE_FILES, 0, 0, 0, 0, 0) - if errno != 0 || pid != 0 { - // restore all signals - afterFork() - return - } - - // restore all signals - afterForkInChild() - - // change working dir - _, _, errno = syscall.RawSyscall(syscall.SYS_FCHDIR, dirfd, 0, 0) - if errno != 0 { - goto childerr - } - _, _, errno = syscall.RawSyscall6(syscall.SYS_MOUNT, source, target, fstype, flags, data, 0) - -childerr: - _, _, errno = syscall.RawSyscall(syscall.SYS_WRITE, uintptr(pipefd), uintptr(unsafe.Pointer(&errno)), unsafe.Sizeof(errno)) - syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0) - panic("unreachable") -} diff --git a/mount/fmountat_linux_test.go b/mount/fmountat_linux_test.go deleted file mode 100644 index 154165c55..000000000 --- a/mount/fmountat_linux_test.go +++ /dev/null @@ -1,164 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package mount - -import ( - "errors" - "os" - "path/filepath" - "syscall" - "testing" - "time" - - "github.com/containerd/continuity/fs/fstest" - "golang.org/x/sys/unix" -) - -type fMountatCaseFunc func(t *testing.T, root string) - -func TestFMountat(t *testing.T) { - if unix.Geteuid() != 0 { - t.Skip("Needs to be run as root") - return - } - - t.Run("Normal", makeTestForFMountat(testFMountatNormal)) - t.Run("ChdirWithFileFd", makeTestForFMountat(testFMountatWithFileFd)) - t.Run("MountWithInvalidSource", makeTestForFMountat(testFMountatWithInvalidSource)) -} - -func makeTestForFMountat(fn fMountatCaseFunc) func(t *testing.T) { - return func(t *testing.T) { - t.Parallel() - - suiteDir := t.TempDir() - - fn(t, suiteDir) - } -} - -func testFMountatNormal(t *testing.T, root string) { - expectedContent := "bye re-exec!\n" - apply := fstest.Apply( - fstest.CreateFile("/hi", []byte(expectedContent), 0777), - ) - - workdir := filepath.Join(root, "work") - if err := os.MkdirAll(workdir, 0777); err != nil { - t.Fatalf("failed to create dir(%s): %+v", workdir, err) - } - - if err := apply.Apply(workdir); err != nil { - t.Fatalf("failed to prepare source dir: %+v", err) - } - - atdir := filepath.Join(root, "at") - if err := os.MkdirAll(atdir, 0777); err != nil { - t.Fatalf("failed to create working dir(%s): %+v", atdir, err) - } - - fsdir := filepath.Join(atdir, "fs") - if err := os.MkdirAll(fsdir, 0777); err != nil { - t.Fatalf("failed to create mount point dir(%s): %+v", fsdir, err) - } - - f, err := os.Open(atdir) - if err != nil { - t.Fatalf("failed to open dir(%s): %+v", atdir, err) - } - defer f.Close() - - // mount work to fs - if err = fMountat(f.Fd(), workdir, "fs", "bind", unix.MS_BIND|unix.MS_RDONLY, ""); err != nil { - t.Fatalf("expected no error here, but got error: %+v", err) - } - defer umount(t, fsdir) - - // check hi file - content, err := os.ReadFile(filepath.Join(fsdir, "hi")) - if err != nil { - t.Fatalf("failed to read file: %+v", err) - } - if got := string(content); got != expectedContent { - t.Fatalf("expected to get(%v), but got(%v)", expectedContent, got) - } - - // check the working directory - cwd, err := os.Getwd() - if err != nil { - t.Fatalf("failed to get current working dir: %+v", err) - } - - if cwd == atdir { - t.Fatal("should not change the current working directory") - } -} - -func testFMountatWithFileFd(t *testing.T, root string) { - // not a directory - expectedErr := syscall.Errno(20) - - emptyFile := filepath.Join(root, "emptyFile") - f, err := os.Create(emptyFile) - if err != nil { - t.Fatalf("failed to create file(%s): %+v", emptyFile, err) - } - defer f.Close() - - err = fMountat(f.Fd(), filepath.Join(root, "empty"), filepath.Join(root, "work"), "", 0, "") - if !errors.Is(err, expectedErr) { - t.Fatalf("expected error %v, but got %v", expectedErr, errors.Unwrap(err)) - } -} - -func testFMountatWithInvalidSource(t *testing.T, root string) { - // no such file or directory - expectedErr := syscall.Errno(2) - - atdir := filepath.Join(root, "at") - if err := os.MkdirAll(atdir, 0777); err != nil { - t.Fatalf("failed to create dir(%s): %+v", atdir, err) - } - - f, err := os.Open(root) - if err != nil { - t.Fatalf("failed to open dir(%s): %+v", atdir, err) - } - defer f.Close() - - err = fMountat(f.Fd(), filepath.Join(root, "oops"), "at", "bind", unix.MS_BIND, "") - if !errors.Is(err, expectedErr) { - t.Fatalf("expected error %v, but got %v", expectedErr, err) - } -} - -func umount(t *testing.T, target string) { - for i := 0; i < 50; i++ { - if err := unix.Unmount(target, unix.MNT_DETACH); err != nil { - switch err { - case unix.EBUSY: - time.Sleep(50 * time.Millisecond) - continue - case unix.EINVAL: - return - default: - continue - } - } - } - t.Fatalf("failed to unmount target %s", target) -} diff --git a/mount/mount_linux.go b/mount/mount_linux.go index a69f65c2d..63df81084 100644 --- a/mount/mount_linux.go +++ b/mount/mount_linux.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path" + "runtime" "strings" "time" @@ -363,24 +364,29 @@ func mountAt(chdir string, source, target, fstype string, flags uintptr, data st return unix.Mount(source, target, fstype, flags, data) } - f, err := os.Open(chdir) - if err != nil { - return fmt.Errorf("failed to mountat: %w", err) - } - defer f.Close() + ch := make(chan error, 1) + go func() { + runtime.LockOSThread() - fs, err := f.Stat() - if err != nil { - return fmt.Errorf("failed to mountat: %w", err) - } + // Do not unlock this thread. + // If the thread is unlocked go will try to use it for other goroutines. + // However it is not possible to restore the thread state after CLONE_FS. + // + // Once the goroutine exits the thread should eventually be terminated by go. - if !fs.IsDir() { - return fmt.Errorf("failed to mountat: %s is not dir", chdir) - } - if err := fMountat(f.Fd(), source, target, fstype, flags, data); err != nil { - return fmt.Errorf("failed to mountat: %w", err) - } - return nil + if err := unix.Unshare(unix.CLONE_FS); err != nil { + ch <- err + return + } + + if err := unix.Chdir(chdir); err != nil { + ch <- err + return + } + + ch <- unix.Mount(source, target, fstype, flags, data) + }() + return <-ch } func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error { diff --git a/mount/mount_linux_test.go b/mount/mount_linux_test.go index 84e708551..14771b027 100644 --- a/mount/mount_linux_test.go +++ b/mount/mount_linux_test.go @@ -25,6 +25,7 @@ import ( "github.com/containerd/continuity/testutil" exec "golang.org/x/sys/execabs" + "golang.org/x/sys/unix" ) func TestLongestCommonPrefix(t *testing.T) { @@ -126,3 +127,48 @@ func TestFUSEHelper(t *testing.T) { t.Fatal(err) } } + +func TestMountAt(t *testing.T) { + testutil.RequiresRoot(t) + + dir1 := t.TempDir() + dir2 := t.TempDir() + + defer unix.Unmount(filepath.Join(dir2, "bar"), unix.MNT_DETACH) + + if err := os.WriteFile(filepath.Join(dir1, "foo"), []byte("foo"), 0644); err != nil { + t.Fatal(err) + } + + if err := os.WriteFile(filepath.Join(dir2, "bar"), []byte{}, 0644); err != nil { + t.Fatal(err) + } + + wd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + + // mount ${dir1}/foo at ${dir2}/bar + // But since we are using `mountAt` we only need to specify the relative path to dir2 as the target mountAt will chdir to there. + if err := mountAt(dir2, filepath.Join(dir1, "foo"), "bar", "none", unix.MS_BIND, ""); err != nil { + t.Fatal(err) + } + + b, err := os.ReadFile(filepath.Join(dir2, "bar")) + if err != nil { + t.Fatal(err) + } + + if string(b) != "foo" { + t.Fatalf("unexpected file content: %s", b) + } + + newWD, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + if wd != newWD { + t.Fatalf("unexpected working directory: %s", newWD) + } +} diff --git a/mount/subprocess_unsafe.s b/mount/subprocess_unsafe.s deleted file mode 100644 index c073fa4ad..000000000 --- a/mount/subprocess_unsafe.s +++ /dev/null @@ -1,15 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ diff --git a/mount/subprocess_unsafe_gc.go b/mount/subprocess_unsafe_gc.go deleted file mode 100644 index 695280a6b..000000000 --- a/mount/subprocess_unsafe_gc.go +++ /dev/null @@ -1,33 +0,0 @@ -//go:build linux && gc -// +build linux,gc - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package mount - -import ( - _ "unsafe" // required for go:linkname. -) - -//go:linkname beforeFork syscall.runtime_BeforeFork -func beforeFork() - -//go:linkname afterFork syscall.runtime_AfterFork -func afterFork() - -//go:linkname afterForkInChild syscall.runtime_AfterForkInChild -func afterForkInChild() diff --git a/mount/subprocess_unsafe_gccgo.go b/mount/subprocess_unsafe_gccgo.go deleted file mode 100644 index 72c38e70d..000000000 --- a/mount/subprocess_unsafe_gccgo.go +++ /dev/null @@ -1,33 +0,0 @@ -//go:build linux && gccgo -// +build linux,gccgo - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package mount - -import ( - _ "unsafe" // required for go:linkname. -) - -//go:linkname beforeFork syscall.runtime__BeforeFork -func beforeFork() - -//go:linkname afterFork syscall.runtime__AfterFork -func afterFork() - -//go:linkname afterForkInChild syscall.runtime__AfterForkInChild -func afterForkInChild()