Merge pull request #7513 from cpuguy83/nix_mount_fork
Replace mount fork hack with CLONE_FS
This commit is contained in:
commit
816f7086bd
@ -1,145 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright The containerd Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package mount
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"runtime"
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/containerd/containerd/log"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// fMountat performs mount from the provided directory.
|
|
||||||
func fMountat(dirfd uintptr, source, target, fstype string, flags uintptr, data string) error {
|
|
||||||
var (
|
|
||||||
sourceP, targetP, fstypeP, dataP *byte
|
|
||||||
pid uintptr
|
|
||||||
err error
|
|
||||||
errno, status syscall.Errno
|
|
||||||
)
|
|
||||||
|
|
||||||
sourceP, err = syscall.BytePtrFromString(source)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
targetP, err = syscall.BytePtrFromString(target)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
fstypeP, err = syscall.BytePtrFromString(fstype)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if data != "" {
|
|
||||||
dataP, err = syscall.BytePtrFromString(data)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
runtime.LockOSThread()
|
|
||||||
defer runtime.UnlockOSThread()
|
|
||||||
|
|
||||||
var pipefds [2]int
|
|
||||||
if err := syscall.Pipe2(pipefds[:], syscall.O_CLOEXEC); err != nil {
|
|
||||||
return fmt.Errorf("failed to open pipe: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
// close both ends of the pipe in a deferred function, since open file
|
|
||||||
// descriptor table is shared with child
|
|
||||||
syscall.Close(pipefds[0])
|
|
||||||
syscall.Close(pipefds[1])
|
|
||||||
}()
|
|
||||||
|
|
||||||
pid, errno = forkAndMountat(dirfd,
|
|
||||||
uintptr(unsafe.Pointer(sourceP)),
|
|
||||||
uintptr(unsafe.Pointer(targetP)),
|
|
||||||
uintptr(unsafe.Pointer(fstypeP)),
|
|
||||||
flags,
|
|
||||||
uintptr(unsafe.Pointer(dataP)),
|
|
||||||
pipefds[1],
|
|
||||||
)
|
|
||||||
|
|
||||||
if errno != 0 {
|
|
||||||
return fmt.Errorf("failed to fork thread: %w", errno)
|
|
||||||
}
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
_, err := unix.Wait4(int(pid), nil, 0, nil)
|
|
||||||
for err == syscall.EINTR {
|
|
||||||
_, err = unix.Wait4(int(pid), nil, 0, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.L.WithError(err).Debugf("failed to find pid=%d process", pid)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
_, _, errno = syscall.RawSyscall(syscall.SYS_READ,
|
|
||||||
uintptr(pipefds[0]),
|
|
||||||
uintptr(unsafe.Pointer(&status)),
|
|
||||||
unsafe.Sizeof(status))
|
|
||||||
if errno != 0 {
|
|
||||||
return fmt.Errorf("failed to read pipe: %w", errno)
|
|
||||||
}
|
|
||||||
|
|
||||||
if status != 0 {
|
|
||||||
return fmt.Errorf("failed to mount: %w", status)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// forkAndMountat will fork thread, change working dir and mount.
|
|
||||||
//
|
|
||||||
// precondition: the runtime OS thread must be locked.
|
|
||||||
func forkAndMountat(dirfd uintptr, source, target, fstype, flags, data uintptr, pipefd int) (pid uintptr, errno syscall.Errno) {
|
|
||||||
|
|
||||||
// block signal during clone
|
|
||||||
beforeFork()
|
|
||||||
|
|
||||||
// the cloned thread shares the open file descriptor, but the thread
|
|
||||||
// never be reused by runtime.
|
|
||||||
pid, _, errno = syscall.RawSyscall6(syscall.SYS_CLONE, uintptr(syscall.SIGCHLD)|syscall.CLONE_FILES, 0, 0, 0, 0, 0)
|
|
||||||
if errno != 0 || pid != 0 {
|
|
||||||
// restore all signals
|
|
||||||
afterFork()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// restore all signals
|
|
||||||
afterForkInChild()
|
|
||||||
|
|
||||||
// change working dir
|
|
||||||
_, _, errno = syscall.RawSyscall(syscall.SYS_FCHDIR, dirfd, 0, 0)
|
|
||||||
if errno != 0 {
|
|
||||||
goto childerr
|
|
||||||
}
|
|
||||||
_, _, errno = syscall.RawSyscall6(syscall.SYS_MOUNT, source, target, fstype, flags, data, 0)
|
|
||||||
|
|
||||||
childerr:
|
|
||||||
_, _, errno = syscall.RawSyscall(syscall.SYS_WRITE, uintptr(pipefd), uintptr(unsafe.Pointer(&errno)), unsafe.Sizeof(errno))
|
|
||||||
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0)
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
@ -1,164 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright The containerd Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package mount
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"syscall"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/containerd/continuity/fs/fstest"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
type fMountatCaseFunc func(t *testing.T, root string)
|
|
||||||
|
|
||||||
func TestFMountat(t *testing.T) {
|
|
||||||
if unix.Geteuid() != 0 {
|
|
||||||
t.Skip("Needs to be run as root")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Run("Normal", makeTestForFMountat(testFMountatNormal))
|
|
||||||
t.Run("ChdirWithFileFd", makeTestForFMountat(testFMountatWithFileFd))
|
|
||||||
t.Run("MountWithInvalidSource", makeTestForFMountat(testFMountatWithInvalidSource))
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeTestForFMountat(fn fMountatCaseFunc) func(t *testing.T) {
|
|
||||||
return func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
suiteDir := t.TempDir()
|
|
||||||
|
|
||||||
fn(t, suiteDir)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testFMountatNormal(t *testing.T, root string) {
|
|
||||||
expectedContent := "bye re-exec!\n"
|
|
||||||
apply := fstest.Apply(
|
|
||||||
fstest.CreateFile("/hi", []byte(expectedContent), 0777),
|
|
||||||
)
|
|
||||||
|
|
||||||
workdir := filepath.Join(root, "work")
|
|
||||||
if err := os.MkdirAll(workdir, 0777); err != nil {
|
|
||||||
t.Fatalf("failed to create dir(%s): %+v", workdir, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := apply.Apply(workdir); err != nil {
|
|
||||||
t.Fatalf("failed to prepare source dir: %+v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
atdir := filepath.Join(root, "at")
|
|
||||||
if err := os.MkdirAll(atdir, 0777); err != nil {
|
|
||||||
t.Fatalf("failed to create working dir(%s): %+v", atdir, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
fsdir := filepath.Join(atdir, "fs")
|
|
||||||
if err := os.MkdirAll(fsdir, 0777); err != nil {
|
|
||||||
t.Fatalf("failed to create mount point dir(%s): %+v", fsdir, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err := os.Open(atdir)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to open dir(%s): %+v", atdir, err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
// mount work to fs
|
|
||||||
if err = fMountat(f.Fd(), workdir, "fs", "bind", unix.MS_BIND|unix.MS_RDONLY, ""); err != nil {
|
|
||||||
t.Fatalf("expected no error here, but got error: %+v", err)
|
|
||||||
}
|
|
||||||
defer umount(t, fsdir)
|
|
||||||
|
|
||||||
// check hi file
|
|
||||||
content, err := os.ReadFile(filepath.Join(fsdir, "hi"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to read file: %+v", err)
|
|
||||||
}
|
|
||||||
if got := string(content); got != expectedContent {
|
|
||||||
t.Fatalf("expected to get(%v), but got(%v)", expectedContent, got)
|
|
||||||
}
|
|
||||||
|
|
||||||
// check the working directory
|
|
||||||
cwd, err := os.Getwd()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to get current working dir: %+v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if cwd == atdir {
|
|
||||||
t.Fatal("should not change the current working directory")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testFMountatWithFileFd(t *testing.T, root string) {
|
|
||||||
// not a directory
|
|
||||||
expectedErr := syscall.Errno(20)
|
|
||||||
|
|
||||||
emptyFile := filepath.Join(root, "emptyFile")
|
|
||||||
f, err := os.Create(emptyFile)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create file(%s): %+v", emptyFile, err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
err = fMountat(f.Fd(), filepath.Join(root, "empty"), filepath.Join(root, "work"), "", 0, "")
|
|
||||||
if !errors.Is(err, expectedErr) {
|
|
||||||
t.Fatalf("expected error %v, but got %v", expectedErr, errors.Unwrap(err))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testFMountatWithInvalidSource(t *testing.T, root string) {
|
|
||||||
// no such file or directory
|
|
||||||
expectedErr := syscall.Errno(2)
|
|
||||||
|
|
||||||
atdir := filepath.Join(root, "at")
|
|
||||||
if err := os.MkdirAll(atdir, 0777); err != nil {
|
|
||||||
t.Fatalf("failed to create dir(%s): %+v", atdir, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err := os.Open(root)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to open dir(%s): %+v", atdir, err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
err = fMountat(f.Fd(), filepath.Join(root, "oops"), "at", "bind", unix.MS_BIND, "")
|
|
||||||
if !errors.Is(err, expectedErr) {
|
|
||||||
t.Fatalf("expected error %v, but got %v", expectedErr, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func umount(t *testing.T, target string) {
|
|
||||||
for i := 0; i < 50; i++ {
|
|
||||||
if err := unix.Unmount(target, unix.MNT_DETACH); err != nil {
|
|
||||||
switch err {
|
|
||||||
case unix.EBUSY:
|
|
||||||
time.Sleep(50 * time.Millisecond)
|
|
||||||
continue
|
|
||||||
case unix.EINVAL:
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t.Fatalf("failed to unmount target %s", target)
|
|
||||||
}
|
|
@ -21,6 +21,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -363,24 +364,29 @@ func mountAt(chdir string, source, target, fstype string, flags uintptr, data st
|
|||||||
return unix.Mount(source, target, fstype, flags, data)
|
return unix.Mount(source, target, fstype, flags, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := os.Open(chdir)
|
ch := make(chan error, 1)
|
||||||
if err != nil {
|
go func() {
|
||||||
return fmt.Errorf("failed to mountat: %w", err)
|
runtime.LockOSThread()
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
fs, err := f.Stat()
|
// Do not unlock this thread.
|
||||||
if err != nil {
|
// If the thread is unlocked go will try to use it for other goroutines.
|
||||||
return fmt.Errorf("failed to mountat: %w", err)
|
// However it is not possible to restore the thread state after CLONE_FS.
|
||||||
}
|
//
|
||||||
|
// Once the goroutine exits the thread should eventually be terminated by go.
|
||||||
|
|
||||||
if !fs.IsDir() {
|
if err := unix.Unshare(unix.CLONE_FS); err != nil {
|
||||||
return fmt.Errorf("failed to mountat: %s is not dir", chdir)
|
ch <- err
|
||||||
}
|
return
|
||||||
if err := fMountat(f.Fd(), source, target, fstype, flags, data); err != nil {
|
}
|
||||||
return fmt.Errorf("failed to mountat: %w", err)
|
|
||||||
}
|
if err := unix.Chdir(chdir); err != nil {
|
||||||
return nil
|
ch <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ch <- unix.Mount(source, target, fstype, flags, data)
|
||||||
|
}()
|
||||||
|
return <-ch
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error {
|
func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error {
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
|
|
||||||
"github.com/containerd/continuity/testutil"
|
"github.com/containerd/continuity/testutil"
|
||||||
exec "golang.org/x/sys/execabs"
|
exec "golang.org/x/sys/execabs"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestLongestCommonPrefix(t *testing.T) {
|
func TestLongestCommonPrefix(t *testing.T) {
|
||||||
@ -126,3 +127,48 @@ func TestFUSEHelper(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMountAt(t *testing.T) {
|
||||||
|
testutil.RequiresRoot(t)
|
||||||
|
|
||||||
|
dir1 := t.TempDir()
|
||||||
|
dir2 := t.TempDir()
|
||||||
|
|
||||||
|
defer unix.Unmount(filepath.Join(dir2, "bar"), unix.MNT_DETACH)
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(dir1, "foo"), []byte("foo"), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(dir2, "bar"), []byte{}, 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
wd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mount ${dir1}/foo at ${dir2}/bar
|
||||||
|
// But since we are using `mountAt` we only need to specify the relative path to dir2 as the target mountAt will chdir to there.
|
||||||
|
if err := mountAt(dir2, filepath.Join(dir1, "foo"), "bar", "none", unix.MS_BIND, ""); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := os.ReadFile(filepath.Join(dir2, "bar"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if string(b) != "foo" {
|
||||||
|
t.Fatalf("unexpected file content: %s", b)
|
||||||
|
}
|
||||||
|
|
||||||
|
newWD, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if wd != newWD {
|
||||||
|
t.Fatalf("unexpected working directory: %s", newWD)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,15 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright The containerd Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
@ -1,33 +0,0 @@
|
|||||||
//go:build linux && gc
|
|
||||||
// +build linux,gc
|
|
||||||
|
|
||||||
/*
|
|
||||||
Copyright The containerd Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package mount
|
|
||||||
|
|
||||||
import (
|
|
||||||
_ "unsafe" // required for go:linkname.
|
|
||||||
)
|
|
||||||
|
|
||||||
//go:linkname beforeFork syscall.runtime_BeforeFork
|
|
||||||
func beforeFork()
|
|
||||||
|
|
||||||
//go:linkname afterFork syscall.runtime_AfterFork
|
|
||||||
func afterFork()
|
|
||||||
|
|
||||||
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
|
||||||
func afterForkInChild()
|
|
@ -1,33 +0,0 @@
|
|||||||
//go:build linux && gccgo
|
|
||||||
// +build linux,gccgo
|
|
||||||
|
|
||||||
/*
|
|
||||||
Copyright The containerd Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package mount
|
|
||||||
|
|
||||||
import (
|
|
||||||
_ "unsafe" // required for go:linkname.
|
|
||||||
)
|
|
||||||
|
|
||||||
//go:linkname beforeFork syscall.runtime__BeforeFork
|
|
||||||
func beforeFork()
|
|
||||||
|
|
||||||
//go:linkname afterFork syscall.runtime__AfterFork
|
|
||||||
func afterFork()
|
|
||||||
|
|
||||||
//go:linkname afterForkInChild syscall.runtime__AfterForkInChild
|
|
||||||
func afterForkInChild()
|
|
Loading…
Reference in New Issue
Block a user