diff --git a/pkg/cri/opts/spec_linux.go b/pkg/cri/opts/spec_linux.go index 53cf464d3..767c9c2fc 100644 --- a/pkg/cri/opts/spec_linux.go +++ b/pkg/cri/opts/spec_linux.go @@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts { } // WithPodNamespaces sets the pod namespaces for the container -func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts { +func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts { namespaces := config.GetNamespaceOptions() opts := []oci.SpecOpts{ @@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER { opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)})) } + + if namespaces.GetUsernsOptions() != nil { + switch namespaces.GetUsernsOptions().GetMode() { + case runtime.NamespaceMode_NODE: + // Nothing to do. Not adding userns field uses the node userns. + case runtime.NamespaceMode_POD: + opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)})) + opts = append(opts, oci.WithUserNamespace(uids, gids)) + } + } + return oci.Compose(opts...) } @@ -745,6 +756,8 @@ const ( utsNSFormat = "/proc/%v/ns/uts" // pidNSFormat is the format of pid namespace of a process. pidNSFormat = "/proc/%v/ns/pid" + // userNSFormat is the format of user namespace of a process. + userNSFormat = "/proc/%v/ns/user" ) // GetNetworkNamespace returns the network namespace of a process. @@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string { return fmt.Sprintf(pidNSFormat, pid) } +// GetUserNamespace returns the user namespace of a process. +func GetUserNamespace(pid uint32) string { + return fmt.Sprintf(userNSFormat, pid) +} + // WithCDI updates OCI spec with CDI content func WithCDI(annotations map[string]string) oci.SpecOpts { return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error { diff --git a/pkg/cri/sbserver/container_create_linux.go b/pkg/cri/sbserver/container_create_linux.go index 558eaf962..71e6af0a6 100644 --- a/pkg/cri/sbserver/container_create_linux.go +++ b/pkg/cri/sbserver/container_create_linux.go @@ -313,7 +313,8 @@ func (c *criService) containerSpec( specOpts = append(specOpts, customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), - customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), + // TODO: This is a hack to make this compile. We should move userns support to sbserver. + customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil), customopts.WithSupplementalGroups(supplementalGroups), customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), customopts.WithAnnotation(annotations.SandboxID, sandboxID), diff --git a/pkg/cri/server/container_create_linux.go b/pkg/cri/server/container_create_linux.go index 93e7469ab..5181c5f5e 100644 --- a/pkg/cri/server/container_create_linux.go +++ b/pkg/cri/server/container_create_linux.go @@ -311,9 +311,14 @@ func (c *criService) containerSpec( targetPid = status.Pid } + uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions()) + if err != nil { + return nil, fmt.Errorf("user namespace configuration: %w", err) + } + specOpts = append(specOpts, customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), - customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), + customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids), customopts.WithSupplementalGroups(supplementalGroups), customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), customopts.WithAnnotation(annotations.SandboxID, sandboxID), diff --git a/pkg/cri/server/container_create_linux_test.go b/pkg/cri/server/container_create_linux_test.go index 8ba7cabb0..a17bd5895 100644 --- a/pkg/cri/server/container_create_linux_test.go +++ b/pkg/cri/server/container_create_linux_test.go @@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) { } } +func TestUserNamespace(t *testing.T) { + testID := "test-id" + testPid := uint32(1234) + testSandboxID := "sandbox-id" + testContainerName := "container-name" + idMap := runtime.IDMapping{ + HostId: 1000, + ContainerId: 1000, + Length: 10, + } + expIDMap := runtimespec.LinuxIDMapping{ + HostID: 1000, + ContainerID: 1000, + Size: 10, + } + containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData() + ociRuntime := config.Runtime{} + c := newTestCRIService() + for desc, test := range map[string]struct { + userNS *runtime.UserNamespace + expNS *runtimespec.LinuxNamespace + expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace + expUIDMapping []runtimespec.LinuxIDMapping + expGIDMapping []runtimespec.LinuxIDMapping + err bool + }{ + "node namespace mode": { + userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE}, + // Expect userns to NOT be present. + expNotNS: &runtimespec.LinuxNamespace{ + Type: runtimespec.UserNamespace, + Path: opts.GetUserNamespace(testPid), + }, + }, + "node namespace mode with mappings": { + userNS: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_NODE, + Uids: []*runtime.IDMapping{&idMap}, + Gids: []*runtime.IDMapping{&idMap}, + }, + err: true, + }, + "container namespace mode": { + userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER}, + err: true, + }, + "target namespace mode": { + userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET}, + err: true, + }, + "unknown namespace mode": { + userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)}, + err: true, + }, + "pod namespace mode": { + userNS: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: []*runtime.IDMapping{&idMap}, + Gids: []*runtime.IDMapping{&idMap}, + }, + expNS: &runtimespec.LinuxNamespace{ + Type: runtimespec.UserNamespace, + Path: opts.GetUserNamespace(testPid), + }, + expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap}, + expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap}, + }, + "pod namespace mode with several mappings": { + userNS: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: []*runtime.IDMapping{&idMap, &idMap}, + Gids: []*runtime.IDMapping{&idMap, &idMap}, + }, + err: true, + }, + "pod namespace mode with uneven mappings": { + userNS: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: []*runtime.IDMapping{&idMap, &idMap}, + Gids: []*runtime.IDMapping{&idMap}, + }, + err: true, + }, + } { + t.Run(desc, func(t *testing.T) { + containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS} + spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) + + if test.err { + assert.Error(t, err) + assert.Nil(t, spec) + return + } + assert.NoError(t, err) + assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping) + assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping) + + if test.expNS != nil { + assert.Contains(t, spec.Linux.Namespaces, *test.expNS) + } + if test.expNotNS != nil { + assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS) + } + }) + } +} + func TestNoDefaultRunMount(t *testing.T) { testID := "test-id" testPid := uint32(1234) diff --git a/pkg/cri/server/sandbox_run_linux.go b/pkg/cri/server/sandbox_run_linux.go index 5aacd76ff..78cca50c6 100644 --- a/pkg/cri/server/sandbox_run_linux.go +++ b/pkg/cri/server/sandbox_run_linux.go @@ -96,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) } + usernsOpts := nsOptions.GetUsernsOptions() + uids, gids, err := parseUsernsIDs(usernsOpts) + if err != nil { + return nil, fmt.Errorf("user namespace configuration: %w", err) + } + + if usernsOpts != nil { + switch mode := usernsOpts.GetMode(); mode { + case runtime.NamespaceMode_NODE: + specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace)) + case runtime.NamespaceMode_POD: + specOpts = append(specOpts, oci.WithUserNamespace(uids, gids)) + default: + return nil, fmt.Errorf("unsupported user namespace mode: %q", mode) + } + } + // It's fine to generate the spec before the sandbox /dev/shm // is actually created. sandboxDevShm := c.getSandboxDevShm(id) diff --git a/pkg/cri/server/sandbox_run_linux_test.go b/pkg/cri/server/sandbox_run_linux_test.go index 378136136..9c646e069 100644 --- a/pkg/cri/server/sandbox_run_linux_test.go +++ b/pkg/cri/server/sandbox_run_linux_test.go @@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf func TestLinuxSandboxContainerSpec(t *testing.T) { testID := "test-id" nsPath := "test-cni" + idMap := runtime.IDMapping{ + HostId: 1000, + ContainerId: 1000, + Length: 10, + } + expIDMap := runtimespec.LinuxIDMapping{ + HostID: 1000, + ContainerID: 1000, + Size: 10, + } + for desc, test := range map[string]struct { configChange func(*runtime.PodSandboxConfig) specCheck func(*testing.T, *runtimespec.Spec) @@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) { }) assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UserNamespace, + }) }, }, "host namespace": { @@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) { assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ Type: runtimespec.IPCNamespace, }) + assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UserNamespace, + }) assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") }, }, + "user namespace": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: []*runtime.IDMapping{&idMap}, + Gids: []*runtime.IDMapping{&idMap}, + }, + }, + } + }, + specCheck: func(t *testing.T, spec *runtimespec.Spec) { + require.NotNil(t, spec.Linux) + assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ + Type: runtimespec.UserNamespace, + }) + require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap}) + require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap}) + + }, + }, + "user namespace mode node and mappings": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_NODE, + Uids: []*runtime.IDMapping{&idMap}, + Gids: []*runtime.IDMapping{&idMap}, + }, + }, + } + }, + expectErr: true, + }, + "user namespace with several mappings": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_NODE, + Uids: []*runtime.IDMapping{&idMap, &idMap}, + Gids: []*runtime.IDMapping{&idMap, &idMap}, + }, + }, + } + }, + expectErr: true, + }, + "user namespace with uneven mappings": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_NODE, + Uids: []*runtime.IDMapping{&idMap, &idMap}, + Gids: []*runtime.IDMapping{&idMap}, + }, + }, + } + }, + expectErr: true, + }, + "user namespace mode container": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_CONTAINER, + }, + }, + } + }, + expectErr: true, + }, + "user namespace mode target": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_TARGET, + }, + }, + } + }, + expectErr: true, + }, + "user namespace unknown mode": { + configChange: func(c *runtime.PodSandboxConfig) { + c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode(100), + }, + }, + } + }, + expectErr: true, + }, "should set supplemental groups correctly": { configChange: func(c *runtime.PodSandboxConfig) { c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{