Merge pull request #10611 from fuweid/getridof-hall-of-shame
core/mount: use ptrace instead of go:linkname
This commit is contained in:
85
pkg/sys/pidfd_linux.go
Normal file
85
pkg/sys/pidfd_linux.go
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/containerd/log"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
pidfdSupported bool
|
||||
pidfdSupportedOnce sync.Once
|
||||
)
|
||||
|
||||
// SupportsPidFD returns true if current kernel supports pidfd.
|
||||
func SupportsPidFD() bool {
|
||||
pidfdSupportedOnce.Do(func() {
|
||||
logger := log.G(context.Background())
|
||||
|
||||
if err := checkPidFD(); err != nil {
|
||||
logger.WithError(err).Error("failed to ensure the kernel supports pidfd")
|
||||
|
||||
pidfdSupported = false
|
||||
return
|
||||
}
|
||||
pidfdSupported = true
|
||||
})
|
||||
return pidfdSupported
|
||||
}
|
||||
|
||||
func checkPidFD() error {
|
||||
// Linux kernel supports pidfd_open(2) since v5.3.
|
||||
//
|
||||
// https://man7.org/linux/man-pages/man2/pidfd_open.2.html
|
||||
pidfd, err := unix.PidfdOpen(os.Getpid(), 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to invoke pidfd_open: %w", err)
|
||||
}
|
||||
defer unix.Close(pidfd)
|
||||
|
||||
// Linux kernel supports pidfd_send_signal(2) since v5.1.
|
||||
//
|
||||
// https://man7.org/linux/man-pages/man2/pidfd_send_signal.2.html
|
||||
if err := unix.PidfdSendSignal(pidfd, 0, nil, 0); err != nil {
|
||||
return fmt.Errorf("failed to invoke pidfd_send_signal: %w", err)
|
||||
}
|
||||
|
||||
// The waitid(2) supports P_PIDFD since Linux kernel v5.4.
|
||||
//
|
||||
// https://man7.org/linux/man-pages/man2/waitid.2.html
|
||||
werr := IgnoringEINTR(func() error {
|
||||
return unix.Waitid(unix.P_PIDFD, pidfd, nil, unix.WEXITED, nil)
|
||||
})
|
||||
|
||||
// The waitid returns ECHILD since current process isn't the child of current process.
|
||||
if !errors.Is(werr, unix.ECHILD) {
|
||||
return fmt.Errorf("failed to invoke waitid with P_PIDFD: wanted error %v, but got %v",
|
||||
unix.ECHILD, werr)
|
||||
}
|
||||
|
||||
// NOTE: The CLONE_PIDFD flag has been supported since Linux kernel v5.2.
|
||||
// So assumption is that if waitid(2) supports P_PIDFD, current kernel
|
||||
// should support CLONE_PIDFD as well.
|
||||
return nil
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
_ "unsafe" // required for go:linkname.
|
||||
)
|
||||
|
||||
//go:linkname beforeFork syscall.runtime_BeforeFork
|
||||
func beforeFork()
|
||||
|
||||
//go:linkname afterFork syscall.runtime_AfterFork
|
||||
func afterFork()
|
||||
|
||||
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
||||
func afterForkInChild()
|
||||
@@ -1,94 +0,0 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ForkUserns is to fork child process with user namespace. It returns child
|
||||
// process's pid and pidfd reference to the child process.
|
||||
//
|
||||
// Precondition: The runtime OS thread must be locked, which is GO runtime
|
||||
// requirement.
|
||||
//
|
||||
// Beside this, the child process sets PR_SET_PDEATHSIG with SIGKILL so that
|
||||
// the parent process's OS thread must be locked. Otherwise, the exit event of
|
||||
// parent process's OS thread will send kill signal to the child process,
|
||||
// even if parent process is still running.
|
||||
//
|
||||
//go:norace
|
||||
//go:noinline
|
||||
func ForkUserns() (_pid uintptr, _pidfd uintptr, _ syscall.Errno) {
|
||||
var (
|
||||
pidfd uintptr
|
||||
pid, ppid uintptr
|
||||
err syscall.Errno
|
||||
)
|
||||
|
||||
ppid, _, err = syscall.RawSyscall(uintptr(syscall.SYS_GETPID), 0, 0, 0)
|
||||
if err != 0 {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
beforeFork()
|
||||
if runtime.GOARCH == "s390x" {
|
||||
// NOTE:
|
||||
//
|
||||
// On the s390 architectures, the order of the first two
|
||||
// arguments is reversed.
|
||||
//
|
||||
// REF: https://man7.org/linux/man-pages/man2/clone.2.html
|
||||
pid, _, err = syscall.RawSyscall(syscall.SYS_CLONE,
|
||||
0,
|
||||
uintptr(syscall.CLONE_NEWUSER|syscall.SIGCHLD|syscall.CLONE_PIDFD),
|
||||
uintptr(unsafe.Pointer(&pidfd)),
|
||||
)
|
||||
} else {
|
||||
pid, _, err = syscall.RawSyscall(syscall.SYS_CLONE,
|
||||
uintptr(syscall.CLONE_NEWUSER|syscall.SIGCHLD|syscall.CLONE_PIDFD),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&pidfd)),
|
||||
)
|
||||
}
|
||||
if err != 0 || pid != 0 {
|
||||
afterFork()
|
||||
return pid, pidfd, err
|
||||
}
|
||||
afterForkInChild()
|
||||
|
||||
if _, _, err = syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0); err != 0 {
|
||||
goto err
|
||||
}
|
||||
|
||||
pid, _, err = syscall.RawSyscall(syscall.SYS_GETPPID, 0, 0, 0)
|
||||
if err != 0 {
|
||||
goto err
|
||||
}
|
||||
|
||||
// exit if re-parent
|
||||
if pid != ppid {
|
||||
goto err
|
||||
}
|
||||
|
||||
_, _, err = syscall.RawSyscall(syscall.SYS_PPOLL, 0, 0, 0)
|
||||
err:
|
||||
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(err), 0, 0)
|
||||
panic("unreachable")
|
||||
}
|
||||
Reference in New Issue
Block a user