The Go runtime has started to [lock down future uses of linkname][1] since
go1.23. In the go source code, containerd project has been marked in the
comment, [hall of shame][2]. Well, the go:linkname is used to fork no-op
subprocess efficiently. However, since that comment, I would like to use
ptrace and remove go:linkname in the whole repository.
With go1.22 `go:linkname`:
```bash
$ go test -bench=.  -benchmem ./ -exec sudo
goos: linux
goarch: amd64
pkg: github.com/containerd/containerd/v2/core/mount
cpu: AMD Ryzen 7 5800H with Radeon Graphics
BenchmarkBatchRunGetUsernsFD_Concurrent1-16                 2440            533320 ns/op            1145 B/op         43 allocs/op
BenchmarkBatchRunGetUsernsFD_Concurrent10-16                 342           3661616 ns/op           11562 B/op        421 allocs/op
PASS
ok      github.com/containerd/containerd/v2/core/mount  2.983s
```
With go1.22 `ptrace`:
```bash
$ go test -bench=.  -benchmem ./ -exec sudo
goos: linux
goarch: amd64
pkg: github.com/containerd/containerd/v2/core/mount
cpu: AMD Ryzen 7 5800H with Radeon Graphics
BenchmarkBatchRunGetUsernsFD_Concurrent1-16                 1785            739557 ns/op            3948 B/op         68 allocs/op
BenchmarkBatchRunGetUsernsFD_Concurrent10-16                 328           4024300 ns/op           39601 B/op        671 allocs/op
PASS
ok      github.com/containerd/containerd/v2/core/mount  3.104s
```
With go1.23 `ptrace`:
```bash
$ go test -bench=.  -benchmem ./ -exec sudo
goos: linux
goarch: amd64
pkg: github.com/containerd/containerd/v2/core/mount
cpu: AMD Ryzen 7 5800H with Radeon Graphics
BenchmarkBatchRunGetUsernsFD_Concurrent1-16                 1815            723252 ns/op            4220 B/op         69 allocs/op
BenchmarkBatchRunGetUsernsFD_Concurrent10-16                 319           3957157 ns/op           42351 B/op        682 allocs/op
PASS
ok      github.com/containerd/containerd/v2/core/mount  3.051s
```
Diff:
The `ptrace` is slower than `go:linkname` mode. However, it's accepctable.
```
goos: linux
goarch: amd64
pkg: github.com/containerd/containerd/v2/core/mount
cpu: AMD Ryzen 7 5800H with Radeon Graphics
                                    │ go122-golinkname │             go122-ptrace              │             go123-ptrace              │
                                    │      sec/op      │    sec/op     vs base                 │    sec/op     vs base                 │
BatchRunGetUsernsFD_Concurrent1-16        533.3µ ± ∞ ¹   739.6µ ± ∞ ¹        ~ (p=1.000 n=1) ²   723.3µ ± ∞ ¹        ~ (p=1.000 n=1) ²
BatchRunGetUsernsFD_Concurrent10-16       3.662m ± ∞ ¹   4.024m ± ∞ ¹        ~ (p=1.000 n=1) ²   3.957m ± ∞ ¹        ~ (p=1.000 n=1) ²
geomean                                   1.397m         1.725m        +23.45%                   1.692m        +21.06%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05
                                    │ go122-golinkname │              go122-ptrace               │              go123-ptrace               │
                                    │       B/op       │     B/op       vs base                  │     B/op       vs base                  │
BatchRunGetUsernsFD_Concurrent1-16       1.118Ki ± ∞ ¹   3.855Ki ± ∞ ¹         ~ (p=1.000 n=1) ²   4.121Ki ± ∞ ¹         ~ (p=1.000 n=1) ²
BatchRunGetUsernsFD_Concurrent10-16      11.29Ki ± ∞ ¹   38.67Ki ± ∞ ¹         ~ (p=1.000 n=1) ²   41.36Ki ± ∞ ¹         ~ (p=1.000 n=1) ²
geomean                                  3.553Ki         12.21Ki        +243.65%                   13.06Ki        +267.43%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05
                                    │ go122-golinkname │             go122-ptrace             │             go123-ptrace             │
                                    │    allocs/op     │  allocs/op   vs base                 │  allocs/op   vs base                 │
BatchRunGetUsernsFD_Concurrent1-16         43.00 ± ∞ ¹   68.00 ± ∞ ¹        ~ (p=1.000 n=1) ²   69.00 ± ∞ ¹        ~ (p=1.000 n=1) ²
BatchRunGetUsernsFD_Concurrent10-16        421.0 ± ∞ ¹   671.0 ± ∞ ¹        ~ (p=1.000 n=1) ²   682.0 ± ∞ ¹        ~ (p=1.000 n=1) ²
geomean                                    134.5         213.6        +58.76%                   216.9        +61.23%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05
```
[1]: <https://github.com/golang/go/issues/67401>
[2]: <https://github.com/golang/go/blob/release-branch.go1.23/src/runtime/proc.go#L4820>
Signed-off-by: Wei Fu <fuweid89@gmail.com>
		
	
		
			
				
	
	
		
			86 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package sys
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
	"os"
 | 
						|
	"sync"
 | 
						|
 | 
						|
	"github.com/containerd/log"
 | 
						|
	"golang.org/x/sys/unix"
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
	pidfdSupported     bool
 | 
						|
	pidfdSupportedOnce sync.Once
 | 
						|
)
 | 
						|
 | 
						|
// SupportsPidFD returns true if current kernel supports pidfd.
 | 
						|
func SupportsPidFD() bool {
 | 
						|
	pidfdSupportedOnce.Do(func() {
 | 
						|
		logger := log.G(context.Background())
 | 
						|
 | 
						|
		if err := checkPidFD(); err != nil {
 | 
						|
			logger.WithError(err).Error("failed to ensure the kernel supports pidfd")
 | 
						|
 | 
						|
			pidfdSupported = false
 | 
						|
			return
 | 
						|
		}
 | 
						|
		pidfdSupported = true
 | 
						|
	})
 | 
						|
	return pidfdSupported
 | 
						|
}
 | 
						|
 | 
						|
func checkPidFD() error {
 | 
						|
	// Linux kernel supports pidfd_open(2) since v5.3.
 | 
						|
	//
 | 
						|
	// https://man7.org/linux/man-pages/man2/pidfd_open.2.html
 | 
						|
	pidfd, err := unix.PidfdOpen(os.Getpid(), 0)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("failed to invoke pidfd_open: %w", err)
 | 
						|
	}
 | 
						|
	defer unix.Close(pidfd)
 | 
						|
 | 
						|
	// Linux kernel supports pidfd_send_signal(2) since v5.1.
 | 
						|
	//
 | 
						|
	// https://man7.org/linux/man-pages/man2/pidfd_send_signal.2.html
 | 
						|
	if err := unix.PidfdSendSignal(pidfd, 0, nil, 0); err != nil {
 | 
						|
		return fmt.Errorf("failed to invoke pidfd_send_signal: %w", err)
 | 
						|
	}
 | 
						|
 | 
						|
	// The waitid(2) supports P_PIDFD since Linux kernel v5.4.
 | 
						|
	//
 | 
						|
	// https://man7.org/linux/man-pages/man2/waitid.2.html
 | 
						|
	werr := IgnoringEINTR(func() error {
 | 
						|
		return unix.Waitid(unix.P_PIDFD, pidfd, nil, unix.WEXITED, nil)
 | 
						|
	})
 | 
						|
 | 
						|
	// The waitid returns ECHILD since current process isn't the child of current process.
 | 
						|
	if !errors.Is(werr, unix.ECHILD) {
 | 
						|
		return fmt.Errorf("failed to invoke waitid with P_PIDFD: wanted error %v, but got %v",
 | 
						|
			unix.ECHILD, werr)
 | 
						|
	}
 | 
						|
 | 
						|
	// NOTE: The CLONE_PIDFD flag has been supported since Linux kernel v5.2.
 | 
						|
	// So assumption is that if waitid(2) supports P_PIDFD, current kernel
 | 
						|
	// should support CLONE_PIDFD as well.
 | 
						|
	return nil
 | 
						|
}
 |