Merge pull request #7679 from kinvolk/rata/userns-stateless-pods

Add support for user namespaces in stateless pods (KEP-127)
This commit is contained in:
Mike Brown
2022-12-29 14:08:24 -06:00
committed by GitHub
23 changed files with 909 additions and 20 deletions

View File

@@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts {
}
// WithPodNamespaces sets the pod namespaces for the container
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts {
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
namespaces := config.GetNamespaceOptions()
opts := []oci.SpecOpts{
@@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
}
if namespaces.GetUsernsOptions() != nil {
switch namespaces.GetUsernsOptions().GetMode() {
case runtime.NamespaceMode_NODE:
// Nothing to do. Not adding userns field uses the node userns.
case runtime.NamespaceMode_POD:
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
opts = append(opts, oci.WithUserNamespace(uids, gids))
}
}
return oci.Compose(opts...)
}
@@ -745,6 +756,8 @@ const (
utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid"
// userNSFormat is the format of user namespace of a process.
userNSFormat = "/proc/%v/ns/user"
)
// GetNetworkNamespace returns the network namespace of a process.
@@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid)
}
// GetUserNamespace returns the user namespace of a process.
func GetUserNamespace(pid uint32) string {
return fmt.Sprintf(userNSFormat, pid)
}
// WithCDI updates OCI spec with CDI content
func WithCDI(annotations map[string]string) oci.SpecOpts {
return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {

View File

@@ -313,7 +313,8 @@ func (c *criService) containerSpec(
specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
// TODO: This is a hack to make this compile. We should move userns support to sbserver.
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),

View File

@@ -184,7 +184,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))
// Grab any platform specific snapshotter opts.
sOpts := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config)
sOpts, err := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config)
if err != nil {
return nil, err
}
// Set snapshotter before any other options.
opts := []containerd.NewContainerOpts{

View File

@@ -311,9 +311,14 @@ func (c *criService) containerSpec(
targetPid = status.Pid
}
uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions())
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids),
customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
@@ -601,6 +606,7 @@ func generateUserString(username string, uid, gid *runtime.Int64Value) (string,
}
// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
return []snapshots.Opt{}
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
}

View File

@@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) {
}
}
func TestUserNamespace(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)
testSandboxID := "sandbox-id"
testContainerName := "container-name"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
for desc, test := range map[string]struct {
userNS *runtime.UserNamespace
expNS *runtimespec.LinuxNamespace
expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace
expUIDMapping []runtimespec.LinuxIDMapping
expGIDMapping []runtimespec.LinuxIDMapping
err bool
}{
"node namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE},
// Expect userns to NOT be present.
expNotNS: &runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(testPid),
},
},
"node namespace mode with mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
err: true,
},
"container namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER},
err: true,
},
"target namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET},
err: true,
},
"unknown namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)},
err: true,
},
"pod namespace mode": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
expNS: &runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(testPid),
},
expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
},
"pod namespace mode with several mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
err: true,
},
"pod namespace mode with uneven mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
err: true,
},
} {
t.Run(desc, func(t *testing.T) {
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS}
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
if test.err {
assert.Error(t, err)
assert.Nil(t, spec)
return
}
assert.NoError(t, err)
assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping)
assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping)
if test.expNS != nil {
assert.Contains(t, spec.Linux.Namespaces, *test.expNS)
}
if test.expNotNS != nil {
assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS)
}
})
}
}
func TestNoDefaultRunMount(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)

View File

@@ -55,6 +55,6 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon
}
// snapshotterOpts returns snapshotter options for the rootfs snapshot
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
return []snapshots.Opt{}
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -145,7 +145,7 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon
}
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
var opts []snapshots.Opt
switch snapshotterName {
@@ -160,5 +160,5 @@ func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []
}
}
return opts
return opts, nil
}

View File

@@ -28,11 +28,13 @@ import (
"syscall"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/apparmor"
"github.com/containerd/containerd/pkg/seccomp"
"github.com/containerd/containerd/pkg/seutil"
"github.com/containerd/containerd/snapshots"
"github.com/moby/sys/mountinfo"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
@@ -275,3 +277,92 @@ func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
spec.Process.SelinuxLabel = l
return nil
}
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]specs.LinuxIDMapping, error) {
var m []specs.LinuxIDMapping
if len(runtimeIDMap) == 0 {
return m, nil
}
if len(runtimeIDMap) > 1 {
// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
}
// We know len is 1 now.
if runtimeIDMap[0] == nil {
return m, nil
}
uidMap := *runtimeIDMap[0]
if uidMap.Length < 1 {
return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
}
m = []specs.LinuxIDMapping{
{
ContainerID: uidMap.ContainerId,
HostID: uidMap.HostId,
Size: uidMap.Length,
},
}
return m, nil
}
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []specs.LinuxIDMapping, retErr error) {
if userns == nil {
// If userns is not set, the kubelet doesn't support this option
// and we should just fallback to no userns. This is completely
// valid.
return nil, nil, nil
}
uidRuntimeMap := userns.GetUids()
gidRuntimeMap := userns.GetGids()
uids, err := parseUsernsIDMap(uidRuntimeMap)
if err != nil {
return nil, nil, fmt.Errorf("UID mapping: %w", err)
}
gids, err = parseUsernsIDMap(gidRuntimeMap)
if err != nil {
return nil, nil, fmt.Errorf("GID mapping: %w", err)
}
switch mode := userns.GetMode(); mode {
case runtime.NamespaceMode_NODE:
if len(uids) != 0 || len(gids) != 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
}
case runtime.NamespaceMode_POD:
// This is valid, we will handle it in WithPodNamespaces().
if len(uids) == 0 || len(gids) == 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
}
default:
return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
return uids, gids, nil
}
func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
snapshotOpt := []snapshots.Opt{}
usernsOpts := nsOpts.GetUsernsOptions()
if usernsOpts == nil {
return snapshotOpt, nil
}
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
}
return snapshotOpt, nil
}

View File

@@ -23,6 +23,7 @@ import (
"fmt"
"math"
"path/filepath"
goruntime "runtime"
"strings"
"time"
@@ -157,10 +158,17 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err != nil {
return nil, fmt.Errorf("failed to generate runtime options: %w", err)
}
snapshotterOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))
sOpts := []snapshots.Opt{snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))}
extraSOpts, err := sandboxSnapshotterOpts(config)
if err != nil {
return nil, err
}
sOpts = append(sOpts, extraSOpts...)
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)),
customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt),
customopts.WithNewSnapshot(id, containerdImage, sOpts...),
containerd.WithSpec(spec, specOpts...),
containerd.WithContainerLabels(sandboxLabels),
containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata),
@@ -244,8 +252,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
}
userNsEnabled := false
if goruntime.GOOS != "windows" {
usernsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions()
if usernsOpts != nil && usernsOpts.GetMode() == runtime.NamespaceMode_POD {
userNsEnabled = true
}
}
// Setup the network namespace if host networking wasn't requested.
if !hostNetwork(config) {
if !hostNetwork(config) && !userNsEnabled {
// XXX: We do c&p of this code later for the podNetwork && userNsEnabled case too.
// We can't move this to a function, as the defer calls need to be executed if other
// errors are returned in this function. So, we would need more refactors to move
// this code to a function and the idea was to not change the current code for
// !userNsEnabled case, therefore doing it would defeat the purpose.
//
// The difference between the cases is the use of netns.NewNetNS() vs
// netns.NewNetNSFromPID() and we verify the task is still running in the other case.
//
// To simplify this, in the future, we should just remove this case (podNetwork &&
// !userNsEnabled) and just keep the other case (podNetwork && userNsEnabled).
netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox
@@ -353,6 +380,88 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, fmt.Errorf("failed to wait for sandbox container task: %w", err)
}
if !hostNetwork(config) && userNsEnabled {
// If userns is enabled, then the netns was created by the OCI runtime
// when creating "task". The OCI runtime needs to create the netns
// because, if userns is in use, the netns needs to be owned by the
// userns. So, let the OCI runtime just handle this for us.
// If the netns is not owned by the userns several problems will happen.
// For instance, the container will lack permission (even if
// capabilities are present) to modify the netns or, even worse, the OCI
// runtime will fail to mount sysfs:
// https://github.com/torvalds/linux/commit/7dc5dbc879bd0779924b5132a48b731a0bc04a1e#diff-4839664cd0c8eab716e064323c7cd71fR1164
netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
var netnsMountDir = "/var/run/netns"
if c.config.NetNSMountsUnderStateDir {
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
}
sandbox.NetNS, err = netns.NewNetNSFromPID(netnsMountDir, task.Pid())
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
// Verify task is still in created state.
if st, err := task.Status(ctx); err != nil || st.Status != containerd.Created {
return nil, fmt.Errorf("failed to create pod sandbox %q: err is %v - status is %q and is expected %q", id, err, st.Status, containerd.Created)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
// Remove the network namespace only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
return
}
sandbox.NetNSPath = ""
}
}()
// Update network namespace in the container's spec
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
if err := container.Update(ctx,
// Update spec of the container
containerd.UpdateContainerOpts(containerd.WithSpec(spec)),
// Update sandbox metadata to include NetNS info
containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata))); err != nil {
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
}
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
defer func() {
// Teardown the network only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Teardown network if an error is returned.
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
}
}
}()
// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
}
sandboxCreateNetworkTimer.UpdateSince(netStart)
}
if c.nri.isEnabled() {
err = c.nri.runPodSandbox(ctx, &sandbox)
if err != nil {

View File

@@ -25,6 +25,7 @@ import (
"github.com/containerd/containerd"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
selinux "github.com/opencontainers/selinux/go-selinux"
@@ -95,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
}
usernsOpts := nsOptions.GetUsernsOptions()
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts != nil {
switch mode := usernsOpts.GetMode(); mode {
case runtime.NamespaceMode_NODE:
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
case runtime.NamespaceMode_POD:
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
default:
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
}
// It's fine to generate the spec before the sandbox /dev/shm
// is actually created.
sandboxDevShm := c.getSandboxDevShm(id)
@@ -358,3 +376,10 @@ func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath strin
}
}
}
// sandboxSnapshotterOpts generates any platform specific snapshotter options
// for a sandbox container.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
}

View File

@@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
func TestLinuxSandboxContainerSpec(t *testing.T) {
testID := "test-id"
nsPath := "test-cni"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
for desc, test := range map[string]struct {
configChange func(*runtime.PodSandboxConfig)
specCheck func(*testing.T, *runtimespec.Spec)
@@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
})
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
},
},
"host namespace": {
@@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.IPCNamespace,
})
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
},
},
"user namespace": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
},
},
"user namespace mode node and mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
"user namespace with several mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
},
}
},
expectErr: true,
},
"user namespace with uneven mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
"user namespace mode container": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_CONTAINER,
},
},
}
},
expectErr: true,
},
"user namespace mode target": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_TARGET,
},
},
}
},
expectErr: true,
},
"user namespace unknown mode": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode(100),
},
},
}
},
expectErr: true,
},
"should set supplemental groups correctly": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{

View File

@@ -21,6 +21,7 @@ package server
import (
"github.com/containerd/containerd"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@@ -56,3 +57,9 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
}
// sandboxSnapshotterOpts generates any platform specific snapshotter options
// for a sandbox container.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -22,6 +22,7 @@ import (
"github.com/containerd/containerd"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@@ -116,3 +117,8 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
spec.Windows.Network.NetworkNamespace = nsPath
}
// No sandbox snapshotter options needed for windows.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@@ -50,7 +50,9 @@ import (
// newNS creates a new persistent (bind-mounted) network namespace and returns the
// path to the network namespace.
func newNS(baseDir string) (nsPath string, err error) {
// If pid is not 0, returns the netns from that pid persistently mounted. Otherwise,
// a new netns is created.
func newNS(baseDir string, pid uint32) (nsPath string, err error) {
b := make([]byte, 16)
_, err = rand.Read(b)
@@ -81,6 +83,16 @@ func newNS(baseDir string) (nsPath string, err error) {
}
}()
if pid != 0 {
procNsPath := getNetNSPathFromPID(pid)
// bind mount the netns onto the mount point. This causes the namespace
// to persist, even when there are no threads in the ns.
if err = unix.Mount(procNsPath, nsPath, "none", unix.MS_BIND, ""); err != nil {
return "", fmt.Errorf("failed to bind mount ns src: %v at %s: %w", procNsPath, nsPath, err)
}
return nsPath, nil
}
var wg sync.WaitGroup
wg.Add(1)
@@ -155,6 +167,10 @@ func getCurrentThreadNetNSPath() string {
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
}
func getNetNSPathFromPID(pid uint32) string {
return fmt.Sprintf("/proc/%d/ns/net", pid)
}
// NetNS holds network namespace.
type NetNS struct {
path string
@@ -162,7 +178,12 @@ type NetNS struct {
// NewNetNS creates a network namespace.
func NewNetNS(baseDir string) (*NetNS, error) {
path, err := newNS(baseDir)
return NewNetNSFromPID(baseDir, 0)
}
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
path, err := newNS(baseDir, pid)
if err != nil {
return nil, fmt.Errorf("failed to setup netns: %w", err)
}

View File

@@ -35,6 +35,11 @@ func NewNetNS(baseDir string) (*NetNS, error) {
return nil, errNotImplementedOnUnix
}
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
return nil, errNotImplementedOnUnix
}
// LoadNetNS loads existing network namespace.
func LoadNetNS(path string) *NetNS {
return &NetNS{path: path}

View File

@@ -16,14 +16,20 @@
package netns
import "github.com/Microsoft/hcsshim/hcn"
import (
"errors"
"github.com/Microsoft/hcsshim/hcn"
)
var errNotImplementedOnWindows = errors.New("not implemented on windows")
// NetNS holds network namespace for sandbox
type NetNS struct {
path string
}
// NewNetNS creates a network namespace for the sandbox
// NewNetNS creates a network namespace for the sandbox.
func NewNetNS(baseDir string) (*NetNS, error) {
temp := hcn.HostComputeNamespace{}
hcnNamespace, err := temp.Create()
@@ -34,6 +40,11 @@ func NewNetNS(baseDir string) (*NetNS, error) {
return &NetNS{path: hcnNamespace.Id}, nil
}
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
return nil, errNotImplementedOnWindows
}
// LoadNetNS loads existing network namespace.
func LoadNetNS(path string) *NetNS {
return &NetNS{path: path}