cri: Support pods with user namespaces

This patch requests the OCI runtime to create a userns when the CRI
message includes such request.

Signed-off-by: Rodrigo Campos <rodrigoca@microsoft.com>
This commit is contained in:
Rodrigo Campos 2022-11-16 12:41:46 +01:00
parent 31a6449734
commit a7adeb6976
6 changed files with 268 additions and 3 deletions

View File

@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts {
} }
// WithPodNamespaces sets the pod namespaces for the container // WithPodNamespaces sets the pod namespaces for the container
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts { func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
namespaces := config.GetNamespaceOptions() namespaces := config.GetNamespaceOptions()
opts := []oci.SpecOpts{ opts := []oci.SpecOpts{
@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER { if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)})) opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
} }
if namespaces.GetUsernsOptions() != nil {
switch namespaces.GetUsernsOptions().GetMode() {
case runtime.NamespaceMode_NODE:
// Nothing to do. Not adding userns field uses the node userns.
case runtime.NamespaceMode_POD:
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
opts = append(opts, oci.WithUserNamespace(uids, gids))
}
}
return oci.Compose(opts...) return oci.Compose(opts...)
} }
@ -745,6 +756,8 @@ const (
utsNSFormat = "/proc/%v/ns/uts" utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process. // pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid" pidNSFormat = "/proc/%v/ns/pid"
// userNSFormat is the format of user namespace of a process.
userNSFormat = "/proc/%v/ns/user"
) )
// GetNetworkNamespace returns the network namespace of a process. // GetNetworkNamespace returns the network namespace of a process.
@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid) return fmt.Sprintf(pidNSFormat, pid)
} }
// GetUserNamespace returns the user namespace of a process.
func GetUserNamespace(pid uint32) string {
return fmt.Sprintf(userNSFormat, pid)
}
// WithCDI updates OCI spec with CDI content // WithCDI updates OCI spec with CDI content
func WithCDI(annotations map[string]string) oci.SpecOpts { func WithCDI(annotations map[string]string) oci.SpecOpts {
return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error { return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {

View File

@ -313,7 +313,8 @@ func (c *criService) containerSpec(
specOpts = append(specOpts, specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), // TODO: This is a hack to make this compile. We should move userns support to sbserver.
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
customopts.WithSupplementalGroups(supplementalGroups), customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID), customopts.WithAnnotation(annotations.SandboxID, sandboxID),

View File

@ -311,9 +311,14 @@ func (c *criService) containerSpec(
targetPid = status.Pid targetPid = status.Pid
} }
uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions())
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
specOpts = append(specOpts, specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids),
customopts.WithSupplementalGroups(supplementalGroups), customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID), customopts.WithAnnotation(annotations.SandboxID, sandboxID),

View File

@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) {
} }
} }
func TestUserNamespace(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)
testSandboxID := "sandbox-id"
testContainerName := "container-name"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
for desc, test := range map[string]struct {
userNS *runtime.UserNamespace
expNS *runtimespec.LinuxNamespace
expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace
expUIDMapping []runtimespec.LinuxIDMapping
expGIDMapping []runtimespec.LinuxIDMapping
err bool
}{
"node namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE},
// Expect userns to NOT be present.
expNotNS: &runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(testPid),
},
},
"node namespace mode with mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
err: true,
},
"container namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER},
err: true,
},
"target namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET},
err: true,
},
"unknown namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)},
err: true,
},
"pod namespace mode": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
expNS: &runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(testPid),
},
expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
},
"pod namespace mode with several mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
err: true,
},
"pod namespace mode with uneven mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
err: true,
},
} {
t.Run(desc, func(t *testing.T) {
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS}
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
if test.err {
assert.Error(t, err)
assert.Nil(t, spec)
return
}
assert.NoError(t, err)
assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping)
assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping)
if test.expNS != nil {
assert.Contains(t, spec.Linux.Namespaces, *test.expNS)
}
if test.expNotNS != nil {
assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS)
}
})
}
}
func TestNoDefaultRunMount(t *testing.T) { func TestNoDefaultRunMount(t *testing.T) {
testID := "test-id" testID := "test-id"
testPid := uint32(1234) testPid := uint32(1234)

View File

@ -96,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
} }
usernsOpts := nsOptions.GetUsernsOptions()
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts != nil {
switch mode := usernsOpts.GetMode(); mode {
case runtime.NamespaceMode_NODE:
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
case runtime.NamespaceMode_POD:
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
default:
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
}
// It's fine to generate the spec before the sandbox /dev/shm // It's fine to generate the spec before the sandbox /dev/shm
// is actually created. // is actually created.
sandboxDevShm := c.getSandboxDevShm(id) sandboxDevShm := c.getSandboxDevShm(id)

View File

@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
func TestLinuxSandboxContainerSpec(t *testing.T) { func TestLinuxSandboxContainerSpec(t *testing.T) {
testID := "test-id" testID := "test-id"
nsPath := "test-cni" nsPath := "test-cni"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
for desc, test := range map[string]struct { for desc, test := range map[string]struct {
configChange func(*runtime.PodSandboxConfig) configChange func(*runtime.PodSandboxConfig)
specCheck func(*testing.T, *runtimespec.Spec) specCheck func(*testing.T, *runtimespec.Spec)
@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
}) })
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
}, },
}, },
"host namespace": { "host namespace": {
@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.IPCNamespace, Type: runtimespec.IPCNamespace,
}) })
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
}, },
}, },
"user namespace": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
},
},
"user namespace mode node and mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
"user namespace with several mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
},
}
},
expectErr: true,
},
"user namespace with uneven mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
"user namespace mode container": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_CONTAINER,
},
},
}
},
expectErr: true,
},
"user namespace mode target": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_TARGET,
},
},
}
},
expectErr: true,
},
"user namespace unknown mode": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode(100),
},
},
}
},
expectErr: true,
},
"should set supplemental groups correctly": { "should set supplemental groups correctly": {
configChange: func(c *runtime.PodSandboxConfig) { configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{