cri: Support pods with user namespaces
This patch requests the OCI runtime to create a userns when the CRI message includes such request. Signed-off-by: Rodrigo Campos <rodrigoca@microsoft.com>
This commit is contained in:
parent
31a6449734
commit
a7adeb6976
@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts {
|
||||
}
|
||||
|
||||
// WithPodNamespaces sets the pod namespaces for the container
|
||||
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts {
|
||||
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
|
||||
namespaces := config.GetNamespaceOptions()
|
||||
|
||||
opts := []oci.SpecOpts{
|
||||
@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid
|
||||
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
|
||||
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
|
||||
}
|
||||
|
||||
if namespaces.GetUsernsOptions() != nil {
|
||||
switch namespaces.GetUsernsOptions().GetMode() {
|
||||
case runtime.NamespaceMode_NODE:
|
||||
// Nothing to do. Not adding userns field uses the node userns.
|
||||
case runtime.NamespaceMode_POD:
|
||||
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
|
||||
opts = append(opts, oci.WithUserNamespace(uids, gids))
|
||||
}
|
||||
}
|
||||
|
||||
return oci.Compose(opts...)
|
||||
}
|
||||
|
||||
@ -745,6 +756,8 @@ const (
|
||||
utsNSFormat = "/proc/%v/ns/uts"
|
||||
// pidNSFormat is the format of pid namespace of a process.
|
||||
pidNSFormat = "/proc/%v/ns/pid"
|
||||
// userNSFormat is the format of user namespace of a process.
|
||||
userNSFormat = "/proc/%v/ns/user"
|
||||
)
|
||||
|
||||
// GetNetworkNamespace returns the network namespace of a process.
|
||||
@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(pidNSFormat, pid)
|
||||
}
|
||||
|
||||
// GetUserNamespace returns the user namespace of a process.
|
||||
func GetUserNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(userNSFormat, pid)
|
||||
}
|
||||
|
||||
// WithCDI updates OCI spec with CDI content
|
||||
func WithCDI(annotations map[string]string) oci.SpecOpts {
|
||||
return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {
|
||||
|
@ -313,7 +313,8 @@ func (c *criService) containerSpec(
|
||||
|
||||
specOpts = append(specOpts,
|
||||
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
|
||||
// TODO: This is a hack to make this compile. We should move userns support to sbserver.
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
|
||||
customopts.WithSupplementalGroups(supplementalGroups),
|
||||
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
|
||||
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
|
||||
|
@ -311,9 +311,14 @@ func (c *criService) containerSpec(
|
||||
targetPid = status.Pid
|
||||
}
|
||||
|
||||
uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("user namespace configuration: %w", err)
|
||||
}
|
||||
|
||||
specOpts = append(specOpts,
|
||||
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids),
|
||||
customopts.WithSupplementalGroups(supplementalGroups),
|
||||
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
|
||||
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
|
||||
|
@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUserNamespace(t *testing.T) {
|
||||
testID := "test-id"
|
||||
testPid := uint32(1234)
|
||||
testSandboxID := "sandbox-id"
|
||||
testContainerName := "container-name"
|
||||
idMap := runtime.IDMapping{
|
||||
HostId: 1000,
|
||||
ContainerId: 1000,
|
||||
Length: 10,
|
||||
}
|
||||
expIDMap := runtimespec.LinuxIDMapping{
|
||||
HostID: 1000,
|
||||
ContainerID: 1000,
|
||||
Size: 10,
|
||||
}
|
||||
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
|
||||
ociRuntime := config.Runtime{}
|
||||
c := newTestCRIService()
|
||||
for desc, test := range map[string]struct {
|
||||
userNS *runtime.UserNamespace
|
||||
expNS *runtimespec.LinuxNamespace
|
||||
expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace
|
||||
expUIDMapping []runtimespec.LinuxIDMapping
|
||||
expGIDMapping []runtimespec.LinuxIDMapping
|
||||
err bool
|
||||
}{
|
||||
"node namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE},
|
||||
// Expect userns to NOT be present.
|
||||
expNotNS: &runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
Path: opts.GetUserNamespace(testPid),
|
||||
},
|
||||
},
|
||||
"node namespace mode with mappings": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
err: true,
|
||||
},
|
||||
"container namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER},
|
||||
err: true,
|
||||
},
|
||||
"target namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET},
|
||||
err: true,
|
||||
},
|
||||
"unknown namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)},
|
||||
err: true,
|
||||
},
|
||||
"pod namespace mode": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
expNS: &runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
Path: opts.GetUserNamespace(testPid),
|
||||
},
|
||||
expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
|
||||
expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
|
||||
},
|
||||
"pod namespace mode with several mappings": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
},
|
||||
err: true,
|
||||
},
|
||||
"pod namespace mode with uneven mappings": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
err: true,
|
||||
},
|
||||
} {
|
||||
t.Run(desc, func(t *testing.T) {
|
||||
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS}
|
||||
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
|
||||
|
||||
if test.err {
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, spec)
|
||||
return
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping)
|
||||
assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping)
|
||||
|
||||
if test.expNS != nil {
|
||||
assert.Contains(t, spec.Linux.Namespaces, *test.expNS)
|
||||
}
|
||||
if test.expNotNS != nil {
|
||||
assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoDefaultRunMount(t *testing.T) {
|
||||
testID := "test-id"
|
||||
testPid := uint32(1234)
|
||||
|
@ -96,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC
|
||||
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
|
||||
}
|
||||
|
||||
usernsOpts := nsOptions.GetUsernsOptions()
|
||||
uids, gids, err := parseUsernsIDs(usernsOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("user namespace configuration: %w", err)
|
||||
}
|
||||
|
||||
if usernsOpts != nil {
|
||||
switch mode := usernsOpts.GetMode(); mode {
|
||||
case runtime.NamespaceMode_NODE:
|
||||
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
|
||||
case runtime.NamespaceMode_POD:
|
||||
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
|
||||
}
|
||||
}
|
||||
|
||||
// It's fine to generate the spec before the sandbox /dev/shm
|
||||
// is actually created.
|
||||
sandboxDevShm := c.getSandboxDevShm(id)
|
||||
|
@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
|
||||
func TestLinuxSandboxContainerSpec(t *testing.T) {
|
||||
testID := "test-id"
|
||||
nsPath := "test-cni"
|
||||
idMap := runtime.IDMapping{
|
||||
HostId: 1000,
|
||||
ContainerId: 1000,
|
||||
Length: 10,
|
||||
}
|
||||
expIDMap := runtimespec.LinuxIDMapping{
|
||||
HostID: 1000,
|
||||
ContainerID: 1000,
|
||||
Size: 10,
|
||||
}
|
||||
|
||||
for desc, test := range map[string]struct {
|
||||
configChange func(*runtime.PodSandboxConfig)
|
||||
specCheck func(*testing.T, *runtimespec.Spec)
|
||||
@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
|
||||
})
|
||||
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
|
||||
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
|
||||
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
})
|
||||
},
|
||||
},
|
||||
"host namespace": {
|
||||
@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
|
||||
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.IPCNamespace,
|
||||
})
|
||||
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
})
|
||||
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
|
||||
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
|
||||
},
|
||||
},
|
||||
"user namespace": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
|
||||
require.NotNil(t, spec.Linux)
|
||||
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
})
|
||||
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
|
||||
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
|
||||
|
||||
},
|
||||
},
|
||||
"user namespace mode node and mappings": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace with several mappings": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace with uneven mappings": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace mode container": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_CONTAINER,
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace mode target": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_TARGET,
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace unknown mode": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode(100),
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"should set supplemental groups correctly": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
|
Loading…
Reference in New Issue
Block a user