Merge pull request #8287 from kinvolk/rata/userns-stateless-idmap

Add support for userns in stateless and stateful pods with idmap mounts (KEP-127, k8s >= 1.27)
This commit is contained in:
Fu Wei
2023-09-14 18:14:02 +08:00
committed by GitHub
22 changed files with 1044 additions and 34 deletions

View File

@@ -14,7 +14,7 @@ require (
github.com/containerd/typeurl/v2 v2.1.1
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0-rc4
github.com/opencontainers/runtime-spec v1.1.0
github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4
github.com/stretchr/testify v1.8.4
go.opentelemetry.io/otel v1.14.0
go.opentelemetry.io/otel/sdk v1.14.0

View File

@@ -1470,8 +1470,9 @@ github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.m
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0-rc.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg=
github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4 h1:EctkgBjZ1y4q+sibyuuIgiKpa0QSd2elFtSSdNvBVow=
github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs=
github.com/opencontainers/runtime-tools v0.9.0/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=

View File

@@ -286,6 +286,10 @@ func WithWindowsResources(r *runtime.WindowsContainerResources) ContainerOpts {
}
func WithVolumeMount(hostPath, containerPath string) ContainerOpts {
return WithIDMapVolumeMount(hostPath, containerPath, nil, nil)
}
func WithIDMapVolumeMount(hostPath, containerPath string, uidMaps, gidMaps []*runtime.IDMapping) ContainerOpts {
return func(c *runtime.ContainerConfig) {
hostPath, _ = filepath.Abs(hostPath)
containerPath, _ = filepath.Abs(containerPath)
@@ -293,6 +297,8 @@ func WithVolumeMount(hostPath, containerPath string) ContainerOpts {
HostPath: hostPath,
ContainerPath: containerPath,
SelinuxRelabel: selinux.GetEnabled(),
UidMappings: uidMaps,
GidMappings: gidMaps,
}
c.Mounts = append(c.Mounts, mount)
}

View File

@@ -17,28 +17,137 @@
package integration
import (
"context"
"errors"
"fmt"
"os"
"os/user"
"path/filepath"
"strings"
"syscall"
"testing"
"time"
"github.com/containerd/containerd/integration/images"
runc "github.com/containerd/go-runc"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
exec "golang.org/x/sys/execabs"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
const (
defaultRoot = "/var/lib/containerd-test"
)
func supportsUserNS() bool {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
return false
}
return true
}
func supportsIDMap(path string) bool {
treeFD, err := unix.OpenTree(-1, path, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC))
if err != nil {
return false
}
defer unix.Close(treeFD)
// We want to test if idmap mounts are supported.
// So we use just some random mapping, it doesn't really matter which one.
// For the helper command, we just need something that is alive while we
// test this, a sleep 5 will do it.
cmd := exec.Command("sleep", "5")
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER,
UidMappings: []syscall.SysProcIDMap{{ContainerID: 0, HostID: 65536, Size: 65536}},
GidMappings: []syscall.SysProcIDMap{{ContainerID: 0, HostID: 65536, Size: 65536}},
}
if err := cmd.Start(); err != nil {
return false
}
defer func() {
_ = cmd.Process.Kill()
_ = cmd.Wait()
}()
usernsFD := fmt.Sprintf("/proc/%d/ns/user", cmd.Process.Pid)
var usernsFile *os.File
if usernsFile, err = os.Open(usernsFD); err != nil {
return false
}
defer usernsFile.Close()
attr := unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_IDMAP,
Userns_fd: uint64(usernsFile.Fd()),
}
if err := unix.MountSetattr(treeFD, "", unix.AT_EMPTY_PATH, &attr); err != nil {
return false
}
return true
}
// traversePath gives 755 permissions for all elements in tPath below
// os.TempDir() and errors out if elements above it don't have read+exec
// permissions for others. tPath MUST be a descendant of os.TempDir(). The path
// returned by testing.TempDir() usually is.
func traversePath(tPath string) error {
// Check the assumption that the argument is under os.TempDir().
tempBase := os.TempDir()
if !strings.HasPrefix(tPath, tempBase) {
return fmt.Errorf("traversePath: %q is not a descendant of %q", tPath, tempBase)
}
var path string
for _, p := range strings.SplitAfter(tPath, "/") {
path = path + p
stats, err := os.Stat(path)
if err != nil {
return err
}
perm := stats.Mode().Perm()
if perm&0o5 == 0o5 {
continue
}
if strings.HasPrefix(tempBase, path) {
return fmt.Errorf("traversePath: directory %q MUST have read+exec permissions for others", path)
}
if err := os.Chmod(path, perm|0o755); err != nil {
return err
}
}
return nil
}
func TestPodUserNS(t *testing.T) {
containerID := uint32(0)
hostID := uint32(65536)
size := uint32(65536)
idmap := []*runtime.IDMapping{
{
ContainerId: containerID,
HostId: hostID,
Length: size,
},
}
volumeHostPath := t.TempDir()
if err := traversePath(volumeHostPath); err != nil {
t.Fatalf("failed to setup volume host path: %v", err)
}
for name, test := range map[string]struct {
sandboxOpts []PodSandboxOpts
containerOpts []ContainerOpts
checkOutput func(t *testing.T, output string)
hostVolumes bool // whether to config uses host Volumes
expectErr bool
}{
"userns uid mapping": {
@@ -85,6 +194,31 @@ func TestPodUserNS(t *testing.T) {
assert.Contains(t, output, "=0=0=")
},
},
"volumes permissions": {
sandboxOpts: []PodSandboxOpts{
WithPodUserNs(containerID, hostID, size),
},
hostVolumes: true,
containerOpts: []ContainerOpts{
WithUserNamespace(containerID, hostID, size),
WithIDMapVolumeMount(volumeHostPath, "/mnt", idmap, idmap),
// Prints numeric UID and GID for path.
// For example, if UID and GID is 0 it will print: =0=0=
// We add the "=" signs so we use can assert.Contains() and be sure
// the UID/GID is 0 and not things like 100 (that contain 0).
// We can't use assert.Equal() easily as it contains timestamp, etc.
WithCommand("stat", "-c", "'=%u=%g='", "/mnt/"),
},
checkOutput: func(t *testing.T, output string) {
// The UID and GID should be the current user if chown/remap is done correctly.
uid := "0"
user, err := user.Current()
if user != nil && err == nil {
uid = user.Uid
}
assert.Contains(t, output, "="+uid+"="+uid+"=")
},
},
"fails with several mappings": {
sandboxOpts: []PodSandboxOpts{
WithPodUserNs(containerID, hostID, size),
@@ -94,12 +228,17 @@ func TestPodUserNS(t *testing.T) {
},
} {
t.Run(name, func(t *testing.T) {
cmd := exec.Command("true")
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER,
if !supportsUserNS() {
t.Skip("User namespaces are not supported")
}
if err := cmd.Run(); err != nil {
t.Skip("skipping test: user namespaces are unavailable")
if !supportsIDMap(defaultRoot) {
t.Skipf("ID mappings are not supported on: %v", defaultRoot)
}
if test.hostVolumes && !supportsIDMap(volumeHostPath) {
t.Skipf("ID mappings are not supported host volume filesystem: %v", volumeHostPath)
}
if err := supportsRuncIDMap(); err != nil {
t.Skipf("OCI runtime doesn't support idmap mounts: %v", err)
}
testPodLogDir := t.TempDir()
@@ -164,3 +303,22 @@ func TestPodUserNS(t *testing.T) {
})
}
}
func supportsRuncIDMap() error {
var r runc.Runc
features, err := r.Features(context.Background())
if err != nil {
// If the features command is not implemented, then runc is too old.
return fmt.Errorf("features command failed: %w", err)
}
if features.Linux.MountExtensions == nil || features.Linux.MountExtensions.IDMap == nil {
return errors.New("missing `mountExtensions.idmap` entry in `features` command")
}
if enabled := features.Linux.MountExtensions.IDMap.Enabled; enabled == nil || !*enabled {
return errors.New("idmap mounts not supported")
}
return nil
}