
The Go runtime has started to [lock down future uses of linkname][1] since go1.23. In the go source code, containerd project has been marked in the comment, [hall of shame][2]. Well, the go:linkname is used to fork no-op subprocess efficiently. However, since that comment, I would like to use ptrace and remove go:linkname in the whole repository. With go1.22 `go:linkname`: ```bash $ go test -bench=. -benchmem ./ -exec sudo goos: linux goarch: amd64 pkg: github.com/containerd/containerd/v2/core/mount cpu: AMD Ryzen 7 5800H with Radeon Graphics BenchmarkBatchRunGetUsernsFD_Concurrent1-16 2440 533320 ns/op 1145 B/op 43 allocs/op BenchmarkBatchRunGetUsernsFD_Concurrent10-16 342 3661616 ns/op 11562 B/op 421 allocs/op PASS ok github.com/containerd/containerd/v2/core/mount 2.983s ``` With go1.22 `ptrace`: ```bash $ go test -bench=. -benchmem ./ -exec sudo goos: linux goarch: amd64 pkg: github.com/containerd/containerd/v2/core/mount cpu: AMD Ryzen 7 5800H with Radeon Graphics BenchmarkBatchRunGetUsernsFD_Concurrent1-16 1785 739557 ns/op 3948 B/op 68 allocs/op BenchmarkBatchRunGetUsernsFD_Concurrent10-16 328 4024300 ns/op 39601 B/op 671 allocs/op PASS ok github.com/containerd/containerd/v2/core/mount 3.104s ``` With go1.23 `ptrace`: ```bash $ go test -bench=. -benchmem ./ -exec sudo goos: linux goarch: amd64 pkg: github.com/containerd/containerd/v2/core/mount cpu: AMD Ryzen 7 5800H with Radeon Graphics BenchmarkBatchRunGetUsernsFD_Concurrent1-16 1815 723252 ns/op 4220 B/op 69 allocs/op BenchmarkBatchRunGetUsernsFD_Concurrent10-16 319 3957157 ns/op 42351 B/op 682 allocs/op PASS ok github.com/containerd/containerd/v2/core/mount 3.051s ``` Diff: The `ptrace` is slower than `go:linkname` mode. However, it's accepctable. ``` goos: linux goarch: amd64 pkg: github.com/containerd/containerd/v2/core/mount cpu: AMD Ryzen 7 5800H with Radeon Graphics │ go122-golinkname │ go122-ptrace │ go123-ptrace │ │ sec/op │ sec/op vs base │ sec/op vs base │ BatchRunGetUsernsFD_Concurrent1-16 533.3µ ± ∞ ¹ 739.6µ ± ∞ ¹ ~ (p=1.000 n=1) ² 723.3µ ± ∞ ¹ ~ (p=1.000 n=1) ² BatchRunGetUsernsFD_Concurrent10-16 3.662m ± ∞ ¹ 4.024m ± ∞ ¹ ~ (p=1.000 n=1) ² 3.957m ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 1.397m 1.725m +23.45% 1.692m +21.06% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 │ go122-golinkname │ go122-ptrace │ go123-ptrace │ │ B/op │ B/op vs base │ B/op vs base │ BatchRunGetUsernsFD_Concurrent1-16 1.118Ki ± ∞ ¹ 3.855Ki ± ∞ ¹ ~ (p=1.000 n=1) ² 4.121Ki ± ∞ ¹ ~ (p=1.000 n=1) ² BatchRunGetUsernsFD_Concurrent10-16 11.29Ki ± ∞ ¹ 38.67Ki ± ∞ ¹ ~ (p=1.000 n=1) ² 41.36Ki ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 3.553Ki 12.21Ki +243.65% 13.06Ki +267.43% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 │ go122-golinkname │ go122-ptrace │ go123-ptrace │ │ allocs/op │ allocs/op vs base │ allocs/op vs base │ BatchRunGetUsernsFD_Concurrent1-16 43.00 ± ∞ ¹ 68.00 ± ∞ ¹ ~ (p=1.000 n=1) ² 69.00 ± ∞ ¹ ~ (p=1.000 n=1) ² BatchRunGetUsernsFD_Concurrent10-16 421.0 ± ∞ ¹ 671.0 ± ∞ ¹ ~ (p=1.000 n=1) ² 682.0 ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 134.5 213.6 +58.76% 216.9 +61.23% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 ``` [1]: <https://github.com/golang/go/issues/67401> [2]: <https://github.com/golang/go/blob/release-branch.go1.23/src/runtime/proc.go#L4820> Signed-off-by: Wei Fu <fuweid89@gmail.com>
105 lines
2.8 KiB
Go
105 lines
2.8 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package mount
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// TODO: Support multiple mappings in future
|
|
func parseIDMapping(mapping string) ([]syscall.SysProcIDMap, error) {
|
|
parts := strings.Split(mapping, ":")
|
|
if len(parts) != 3 {
|
|
return nil, fmt.Errorf("user namespace mappings require the format `container-id:host-id:size`")
|
|
}
|
|
|
|
cID, err := strconv.Atoi(parts[0])
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid container id for user namespace remapping, %w", err)
|
|
}
|
|
|
|
hID, err := strconv.Atoi(parts[1])
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid host id for user namespace remapping, %w", err)
|
|
}
|
|
|
|
size, err := strconv.Atoi(parts[2])
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid size for user namespace remapping, %w", err)
|
|
}
|
|
|
|
if cID < 0 || hID < 0 || size < 0 {
|
|
return nil, fmt.Errorf("invalid mapping %s, all IDs and size must be positive integers", mapping)
|
|
}
|
|
|
|
return []syscall.SysProcIDMap{
|
|
{
|
|
ContainerID: cID,
|
|
HostID: hID,
|
|
Size: size,
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// IDMapMount applies GID/UID shift according to gidmap/uidmap for target path
|
|
func IDMapMount(source, target string, usernsFd int) (err error) {
|
|
var (
|
|
attr unix.MountAttr
|
|
)
|
|
|
|
attr.Attr_set = unix.MOUNT_ATTR_IDMAP
|
|
attr.Attr_clr = 0
|
|
attr.Propagation = 0
|
|
attr.Userns_fd = uint64(usernsFd)
|
|
|
|
dFd, err := unix.OpenTree(-int(unix.EBADF), source, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC|unix.AT_EMPTY_PATH))
|
|
if err != nil {
|
|
return fmt.Errorf("Unable to open tree for %s: %w", target, err)
|
|
}
|
|
|
|
defer unix.Close(dFd)
|
|
if err = unix.MountSetattr(dFd, "", unix.AT_EMPTY_PATH, &attr); err != nil {
|
|
return fmt.Errorf("Unable to shift GID/UID for %s: %w", target, err)
|
|
}
|
|
|
|
if err = unix.MoveMount(dFd, "", -int(unix.EBADF), target, unix.MOVE_MOUNT_F_EMPTY_PATH); err != nil {
|
|
return fmt.Errorf("Unable to attach mount tree to %s: %w", target, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetUsernsFD forks the current process and creates a user namespace using
|
|
// the specified mappings.
|
|
func GetUsernsFD(uidmap, gidmap string) (_usernsFD *os.File, _ error) {
|
|
uidMaps, err := parseIDMapping(uidmap)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
gidMaps, err := parseIDMapping(gidmap)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return getUsernsFD(uidMaps, gidMaps)
|
|
}
|