Merge pull request #7513 from cpuguy83/nix_mount_fork
Replace mount fork hack with CLONE_FS
This commit is contained in:
commit
816f7086bd
@ -1,145 +0,0 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/containerd/containerd/log"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// fMountat performs mount from the provided directory.
|
||||
func fMountat(dirfd uintptr, source, target, fstype string, flags uintptr, data string) error {
|
||||
var (
|
||||
sourceP, targetP, fstypeP, dataP *byte
|
||||
pid uintptr
|
||||
err error
|
||||
errno, status syscall.Errno
|
||||
)
|
||||
|
||||
sourceP, err = syscall.BytePtrFromString(source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
targetP, err = syscall.BytePtrFromString(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fstypeP, err = syscall.BytePtrFromString(fstype)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if data != "" {
|
||||
dataP, err = syscall.BytePtrFromString(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
var pipefds [2]int
|
||||
if err := syscall.Pipe2(pipefds[:], syscall.O_CLOEXEC); err != nil {
|
||||
return fmt.Errorf("failed to open pipe: %w", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// close both ends of the pipe in a deferred function, since open file
|
||||
// descriptor table is shared with child
|
||||
syscall.Close(pipefds[0])
|
||||
syscall.Close(pipefds[1])
|
||||
}()
|
||||
|
||||
pid, errno = forkAndMountat(dirfd,
|
||||
uintptr(unsafe.Pointer(sourceP)),
|
||||
uintptr(unsafe.Pointer(targetP)),
|
||||
uintptr(unsafe.Pointer(fstypeP)),
|
||||
flags,
|
||||
uintptr(unsafe.Pointer(dataP)),
|
||||
pipefds[1],
|
||||
)
|
||||
|
||||
if errno != 0 {
|
||||
return fmt.Errorf("failed to fork thread: %w", errno)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_, err := unix.Wait4(int(pid), nil, 0, nil)
|
||||
for err == syscall.EINTR {
|
||||
_, err = unix.Wait4(int(pid), nil, 0, nil)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.L.WithError(err).Debugf("failed to find pid=%d process", pid)
|
||||
}
|
||||
}()
|
||||
|
||||
_, _, errno = syscall.RawSyscall(syscall.SYS_READ,
|
||||
uintptr(pipefds[0]),
|
||||
uintptr(unsafe.Pointer(&status)),
|
||||
unsafe.Sizeof(status))
|
||||
if errno != 0 {
|
||||
return fmt.Errorf("failed to read pipe: %w", errno)
|
||||
}
|
||||
|
||||
if status != 0 {
|
||||
return fmt.Errorf("failed to mount: %w", status)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// forkAndMountat will fork thread, change working dir and mount.
|
||||
//
|
||||
// precondition: the runtime OS thread must be locked.
|
||||
func forkAndMountat(dirfd uintptr, source, target, fstype, flags, data uintptr, pipefd int) (pid uintptr, errno syscall.Errno) {
|
||||
|
||||
// block signal during clone
|
||||
beforeFork()
|
||||
|
||||
// the cloned thread shares the open file descriptor, but the thread
|
||||
// never be reused by runtime.
|
||||
pid, _, errno = syscall.RawSyscall6(syscall.SYS_CLONE, uintptr(syscall.SIGCHLD)|syscall.CLONE_FILES, 0, 0, 0, 0, 0)
|
||||
if errno != 0 || pid != 0 {
|
||||
// restore all signals
|
||||
afterFork()
|
||||
return
|
||||
}
|
||||
|
||||
// restore all signals
|
||||
afterForkInChild()
|
||||
|
||||
// change working dir
|
||||
_, _, errno = syscall.RawSyscall(syscall.SYS_FCHDIR, dirfd, 0, 0)
|
||||
if errno != 0 {
|
||||
goto childerr
|
||||
}
|
||||
_, _, errno = syscall.RawSyscall6(syscall.SYS_MOUNT, source, target, fstype, flags, data, 0)
|
||||
|
||||
childerr:
|
||||
_, _, errno = syscall.RawSyscall(syscall.SYS_WRITE, uintptr(pipefd), uintptr(unsafe.Pointer(&errno)), unsafe.Sizeof(errno))
|
||||
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0)
|
||||
panic("unreachable")
|
||||
}
|
@ -1,164 +0,0 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/continuity/fs/fstest"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type fMountatCaseFunc func(t *testing.T, root string)
|
||||
|
||||
func TestFMountat(t *testing.T) {
|
||||
if unix.Geteuid() != 0 {
|
||||
t.Skip("Needs to be run as root")
|
||||
return
|
||||
}
|
||||
|
||||
t.Run("Normal", makeTestForFMountat(testFMountatNormal))
|
||||
t.Run("ChdirWithFileFd", makeTestForFMountat(testFMountatWithFileFd))
|
||||
t.Run("MountWithInvalidSource", makeTestForFMountat(testFMountatWithInvalidSource))
|
||||
}
|
||||
|
||||
func makeTestForFMountat(fn fMountatCaseFunc) func(t *testing.T) {
|
||||
return func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
suiteDir := t.TempDir()
|
||||
|
||||
fn(t, suiteDir)
|
||||
}
|
||||
}
|
||||
|
||||
func testFMountatNormal(t *testing.T, root string) {
|
||||
expectedContent := "bye re-exec!\n"
|
||||
apply := fstest.Apply(
|
||||
fstest.CreateFile("/hi", []byte(expectedContent), 0777),
|
||||
)
|
||||
|
||||
workdir := filepath.Join(root, "work")
|
||||
if err := os.MkdirAll(workdir, 0777); err != nil {
|
||||
t.Fatalf("failed to create dir(%s): %+v", workdir, err)
|
||||
}
|
||||
|
||||
if err := apply.Apply(workdir); err != nil {
|
||||
t.Fatalf("failed to prepare source dir: %+v", err)
|
||||
}
|
||||
|
||||
atdir := filepath.Join(root, "at")
|
||||
if err := os.MkdirAll(atdir, 0777); err != nil {
|
||||
t.Fatalf("failed to create working dir(%s): %+v", atdir, err)
|
||||
}
|
||||
|
||||
fsdir := filepath.Join(atdir, "fs")
|
||||
if err := os.MkdirAll(fsdir, 0777); err != nil {
|
||||
t.Fatalf("failed to create mount point dir(%s): %+v", fsdir, err)
|
||||
}
|
||||
|
||||
f, err := os.Open(atdir)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open dir(%s): %+v", atdir, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// mount work to fs
|
||||
if err = fMountat(f.Fd(), workdir, "fs", "bind", unix.MS_BIND|unix.MS_RDONLY, ""); err != nil {
|
||||
t.Fatalf("expected no error here, but got error: %+v", err)
|
||||
}
|
||||
defer umount(t, fsdir)
|
||||
|
||||
// check hi file
|
||||
content, err := os.ReadFile(filepath.Join(fsdir, "hi"))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read file: %+v", err)
|
||||
}
|
||||
if got := string(content); got != expectedContent {
|
||||
t.Fatalf("expected to get(%v), but got(%v)", expectedContent, got)
|
||||
}
|
||||
|
||||
// check the working directory
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get current working dir: %+v", err)
|
||||
}
|
||||
|
||||
if cwd == atdir {
|
||||
t.Fatal("should not change the current working directory")
|
||||
}
|
||||
}
|
||||
|
||||
func testFMountatWithFileFd(t *testing.T, root string) {
|
||||
// not a directory
|
||||
expectedErr := syscall.Errno(20)
|
||||
|
||||
emptyFile := filepath.Join(root, "emptyFile")
|
||||
f, err := os.Create(emptyFile)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create file(%s): %+v", emptyFile, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
err = fMountat(f.Fd(), filepath.Join(root, "empty"), filepath.Join(root, "work"), "", 0, "")
|
||||
if !errors.Is(err, expectedErr) {
|
||||
t.Fatalf("expected error %v, but got %v", expectedErr, errors.Unwrap(err))
|
||||
}
|
||||
}
|
||||
|
||||
func testFMountatWithInvalidSource(t *testing.T, root string) {
|
||||
// no such file or directory
|
||||
expectedErr := syscall.Errno(2)
|
||||
|
||||
atdir := filepath.Join(root, "at")
|
||||
if err := os.MkdirAll(atdir, 0777); err != nil {
|
||||
t.Fatalf("failed to create dir(%s): %+v", atdir, err)
|
||||
}
|
||||
|
||||
f, err := os.Open(root)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open dir(%s): %+v", atdir, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
err = fMountat(f.Fd(), filepath.Join(root, "oops"), "at", "bind", unix.MS_BIND, "")
|
||||
if !errors.Is(err, expectedErr) {
|
||||
t.Fatalf("expected error %v, but got %v", expectedErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func umount(t *testing.T, target string) {
|
||||
for i := 0; i < 50; i++ {
|
||||
if err := unix.Unmount(target, unix.MNT_DETACH); err != nil {
|
||||
switch err {
|
||||
case unix.EBUSY:
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
continue
|
||||
case unix.EINVAL:
|
||||
return
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
t.Fatalf("failed to unmount target %s", target)
|
||||
}
|
@ -21,6 +21,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -363,24 +364,29 @@ func mountAt(chdir string, source, target, fstype string, flags uintptr, data st
|
||||
return unix.Mount(source, target, fstype, flags, data)
|
||||
}
|
||||
|
||||
f, err := os.Open(chdir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to mountat: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
ch := make(chan error, 1)
|
||||
go func() {
|
||||
runtime.LockOSThread()
|
||||
|
||||
fs, err := f.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to mountat: %w", err)
|
||||
}
|
||||
// Do not unlock this thread.
|
||||
// If the thread is unlocked go will try to use it for other goroutines.
|
||||
// However it is not possible to restore the thread state after CLONE_FS.
|
||||
//
|
||||
// Once the goroutine exits the thread should eventually be terminated by go.
|
||||
|
||||
if !fs.IsDir() {
|
||||
return fmt.Errorf("failed to mountat: %s is not dir", chdir)
|
||||
}
|
||||
if err := fMountat(f.Fd(), source, target, fstype, flags, data); err != nil {
|
||||
return fmt.Errorf("failed to mountat: %w", err)
|
||||
}
|
||||
return nil
|
||||
if err := unix.Unshare(unix.CLONE_FS); err != nil {
|
||||
ch <- err
|
||||
return
|
||||
}
|
||||
|
||||
if err := unix.Chdir(chdir); err != nil {
|
||||
ch <- err
|
||||
return
|
||||
}
|
||||
|
||||
ch <- unix.Mount(source, target, fstype, flags, data)
|
||||
}()
|
||||
return <-ch
|
||||
}
|
||||
|
||||
func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error {
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
|
||||
"github.com/containerd/continuity/testutil"
|
||||
exec "golang.org/x/sys/execabs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func TestLongestCommonPrefix(t *testing.T) {
|
||||
@ -126,3 +127,48 @@ func TestFUSEHelper(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMountAt(t *testing.T) {
|
||||
testutil.RequiresRoot(t)
|
||||
|
||||
dir1 := t.TempDir()
|
||||
dir2 := t.TempDir()
|
||||
|
||||
defer unix.Unmount(filepath.Join(dir2, "bar"), unix.MNT_DETACH)
|
||||
|
||||
if err := os.WriteFile(filepath.Join(dir1, "foo"), []byte("foo"), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(dir2, "bar"), []byte{}, 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// mount ${dir1}/foo at ${dir2}/bar
|
||||
// But since we are using `mountAt` we only need to specify the relative path to dir2 as the target mountAt will chdir to there.
|
||||
if err := mountAt(dir2, filepath.Join(dir1, "foo"), "bar", "none", unix.MS_BIND, ""); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(filepath.Join(dir2, "bar"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if string(b) != "foo" {
|
||||
t.Fatalf("unexpected file content: %s", b)
|
||||
}
|
||||
|
||||
newWD, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if wd != newWD {
|
||||
t.Fatalf("unexpected working directory: %s", newWD)
|
||||
}
|
||||
}
|
||||
|
@ -1,15 +0,0 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
@ -1,33 +0,0 @@
|
||||
//go:build linux && gc
|
||||
// +build linux,gc
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
_ "unsafe" // required for go:linkname.
|
||||
)
|
||||
|
||||
//go:linkname beforeFork syscall.runtime_BeforeFork
|
||||
func beforeFork()
|
||||
|
||||
//go:linkname afterFork syscall.runtime_AfterFork
|
||||
func afterFork()
|
||||
|
||||
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
||||
func afterForkInChild()
|
@ -1,33 +0,0 @@
|
||||
//go:build linux && gccgo
|
||||
// +build linux,gccgo
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
_ "unsafe" // required for go:linkname.
|
||||
)
|
||||
|
||||
//go:linkname beforeFork syscall.runtime__BeforeFork
|
||||
func beforeFork()
|
||||
|
||||
//go:linkname afterFork syscall.runtime__AfterFork
|
||||
func afterFork()
|
||||
|
||||
//go:linkname afterForkInChild syscall.runtime__AfterForkInChild
|
||||
func afterForkInChild()
|
Loading…
Reference in New Issue
Block a user