Merge pull request #7679 from kinvolk/rata/userns-stateless-pods

Add support for user namespaces in stateless pods (KEP-127)
This commit is contained in:
Mike Brown 2022-12-29 14:08:24 -06:00 committed by GitHub
commit 66f186d42d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 909 additions and 20 deletions

View File

@ -866,3 +866,21 @@ func toPlatforms(pt []*apitypes.Platform) []ocispec.Platform {
} }
return platforms return platforms
} }
// GetSnapshotterCapabilities returns the capabilities of a snapshotter.
func (c *Client) GetSnapshotterCapabilities(ctx context.Context, snapshotterName string) ([]string, error) {
filters := []string{fmt.Sprintf("type==%s, id==%s", plugin.SnapshotPlugin, snapshotterName)}
in := c.IntrospectionService()
resp, err := in.Plugins(ctx, filters)
if err != nil {
return nil, err
}
if len(resp.Plugins) <= 0 {
return nil, fmt.Errorf("inspection service could not find snapshotter %s plugin", snapshotterName)
}
sn := resp.Plugins[0]
return sn.Capabilities, nil
}

View File

@ -224,6 +224,11 @@ func WithNewSnapshot(id string, i Image, opts ...snapshots.Opt) NewContainerOpts
if err != nil { if err != nil {
return err return err
} }
parent, err = resolveSnapshotOptions(ctx, client, c.Snapshotter, s, parent, opts...)
if err != nil {
return err
}
if _, err := s.Prepare(ctx, id, parent, opts...); err != nil { if _, err := s.Prepare(ctx, id, parent, opts...); err != nil {
return err return err
} }
@ -268,6 +273,11 @@ func WithNewSnapshotView(id string, i Image, opts ...snapshots.Opt) NewContainer
if err != nil { if err != nil {
return err return err
} }
parent, err = resolveSnapshotOptions(ctx, client, c.Snapshotter, s, parent, opts...)
if err != nil {
return err
}
if _, err := s.View(ctx, id, parent, opts...); err != nil { if _, err := s.View(ctx, id, parent, opts...); err != nil {
return err return err
} }

View File

@ -126,6 +126,35 @@ func WithHostNetwork(p *runtime.PodSandboxConfig) {
p.Linux.SecurityContext.NamespaceOptions.Network = runtime.NamespaceMode_NODE p.Linux.SecurityContext.NamespaceOptions.Network = runtime.NamespaceMode_NODE
} }
// Set pod userns.
func WithPodUserNs(containerID, hostID, length uint32) PodSandboxOpts {
return func(p *runtime.PodSandboxConfig) {
if p.Linux == nil {
p.Linux = &runtime.LinuxPodSandboxConfig{}
}
if p.Linux.SecurityContext == nil {
p.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{}
}
if p.Linux.SecurityContext.NamespaceOptions == nil {
p.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
}
idMap := runtime.IDMapping{
HostId: hostID,
ContainerId: containerID,
Length: length,
}
if p.Linux.SecurityContext.NamespaceOptions.UsernsOptions == nil {
p.Linux.SecurityContext.NamespaceOptions.UsernsOptions = &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
}
}
p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids = append(p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids, &idMap)
p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids = append(p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids, &idMap)
}
}
// Set host pid. // Set host pid.
func WithHostPid(p *runtime.PodSandboxConfig) { func WithHostPid(p *runtime.PodSandboxConfig) {
if p.Linux == nil { if p.Linux == nil {
@ -314,6 +343,35 @@ func WithPidNamespace(mode runtime.NamespaceMode) ContainerOpts {
} }
// Add user namespace pod mode.
func WithUserNamespace(containerID, hostID, length uint32) ContainerOpts {
return func(c *runtime.ContainerConfig) {
if c.Linux == nil {
c.Linux = &runtime.LinuxContainerConfig{}
}
if c.Linux.SecurityContext == nil {
c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
}
if c.Linux.SecurityContext.NamespaceOptions == nil {
c.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
}
idMap := runtime.IDMapping{
HostId: hostID,
ContainerId: containerID,
Length: length,
}
if c.Linux.SecurityContext.NamespaceOptions.UsernsOptions == nil {
c.Linux.SecurityContext.NamespaceOptions.UsernsOptions = &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
}
}
c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids = append(c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids, &idMap)
c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids = append(c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids, &idMap)
}
}
// Add container log path. // Add container log path.
func WithLogPath(path string) ContainerOpts { func WithLogPath(path string) ContainerOpts {
return func(c *runtime.ContainerConfig) { return func(c *runtime.ContainerConfig) {

View File

@ -0,0 +1,169 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package integration
import (
"fmt"
"os"
"path/filepath"
"syscall"
"testing"
"time"
"github.com/containerd/containerd/integration/images"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
exec "golang.org/x/sys/execabs"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestPodUserNS(t *testing.T) {
containerID := uint32(0)
hostID := uint32(65536)
size := uint32(65536)
for name, test := range map[string]struct {
sandboxOpts []PodSandboxOpts
containerOpts []ContainerOpts
checkOutput func(t *testing.T, output string)
expectErr bool
}{
"userns uid mapping": {
sandboxOpts: []PodSandboxOpts{
WithPodUserNs(containerID, hostID, size),
},
containerOpts: []ContainerOpts{
WithUserNamespace(containerID, hostID, size),
WithCommand("cat", "/proc/self/uid_map"),
},
checkOutput: func(t *testing.T, output string) {
// The output should contain the length of the userns requested.
assert.Contains(t, output, fmt.Sprint(size))
},
},
"userns gid mapping": {
sandboxOpts: []PodSandboxOpts{
WithPodUserNs(containerID, hostID, size),
},
containerOpts: []ContainerOpts{
WithUserNamespace(containerID, hostID, size),
WithCommand("cat", "/proc/self/gid_map"),
},
checkOutput: func(t *testing.T, output string) {
// The output should contain the length of the userns requested.
assert.Contains(t, output, fmt.Sprint(size))
},
},
"rootfs permissions": {
sandboxOpts: []PodSandboxOpts{
WithPodUserNs(containerID, hostID, size),
},
containerOpts: []ContainerOpts{
WithUserNamespace(containerID, hostID, size),
// Prints numeric UID and GID for path.
// For example, if UID and GID is 0 it will print: =0=0=
// We add the "=" signs so we use can assert.Contains() and be sure
// the UID/GID is 0 and not things like 100 (that contain 0).
// We can't use assert.Equal() easily as it contains timestamp, etc.
WithCommand("stat", "-c", "'=%u=%g='", "/root/"),
},
checkOutput: func(t *testing.T, output string) {
// The UID and GID should be 0 (root) if the chown/remap is done correctly.
assert.Contains(t, output, "=0=0=")
},
},
"fails with several mappings": {
sandboxOpts: []PodSandboxOpts{
WithPodUserNs(containerID, hostID, size),
WithPodUserNs(containerID*2, hostID*2, size*2),
},
expectErr: true,
},
} {
t.Run(name, func(t *testing.T) {
if os.Getenv("ENABLE_CRI_SANDBOXES") == "'sandboxed'" {
t.Skip("skipping test: userns not supported/needed in sanboxed runtimes")
}
cmd := exec.Command("true")
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER,
}
if err := cmd.Run(); err != nil {
t.Skip("skipping test: user namespaces are unavailable")
}
testPodLogDir := t.TempDir()
sandboxOpts := append(test.sandboxOpts, WithPodLogDirectory(testPodLogDir))
t.Log("Create a sandbox with userns")
sbConfig := PodSandboxConfig("sandbox", "userns", sandboxOpts...)
sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
if err != nil {
if !test.expectErr {
t.Fatalf("Unexpected RunPodSandbox error: %v", err)
}
return
}
// Make sure the sandbox is cleaned up.
defer func() {
assert.NoError(t, runtimeService.StopPodSandbox(sb))
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
}()
if test.expectErr {
t.Fatalf("Expected RunPodSandbox to return error")
}
var (
testImage = images.Get(images.BusyBox)
containerName = "test-container"
)
EnsureImageExists(t, testImage)
containerOpts := append(test.containerOpts,
WithLogPath(containerName),
)
t.Log("Create a container for userns")
cnConfig := ContainerConfig(
containerName,
testImage,
containerOpts...,
)
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
require.NoError(t, err)
t.Log("Start the container")
require.NoError(t, runtimeService.StartContainer(cn))
t.Log("Wait for container to finish running")
require.NoError(t, Eventually(func() (bool, error) {
s, err := runtimeService.ContainerStatus(cn)
if err != nil {
return false, err
}
if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
return true, nil
}
return false, nil
}, time.Second, 30*time.Second))
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
assert.NoError(t, err)
t.Log("Running check function")
test.checkOutput(t, string(content))
})
}
}

View File

@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts {
} }
// WithPodNamespaces sets the pod namespaces for the container // WithPodNamespaces sets the pod namespaces for the container
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts { func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
namespaces := config.GetNamespaceOptions() namespaces := config.GetNamespaceOptions()
opts := []oci.SpecOpts{ opts := []oci.SpecOpts{
@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER { if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)})) opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
} }
if namespaces.GetUsernsOptions() != nil {
switch namespaces.GetUsernsOptions().GetMode() {
case runtime.NamespaceMode_NODE:
// Nothing to do. Not adding userns field uses the node userns.
case runtime.NamespaceMode_POD:
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
opts = append(opts, oci.WithUserNamespace(uids, gids))
}
}
return oci.Compose(opts...) return oci.Compose(opts...)
} }
@ -745,6 +756,8 @@ const (
utsNSFormat = "/proc/%v/ns/uts" utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process. // pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid" pidNSFormat = "/proc/%v/ns/pid"
// userNSFormat is the format of user namespace of a process.
userNSFormat = "/proc/%v/ns/user"
) )
// GetNetworkNamespace returns the network namespace of a process. // GetNetworkNamespace returns the network namespace of a process.
@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid) return fmt.Sprintf(pidNSFormat, pid)
} }
// GetUserNamespace returns the user namespace of a process.
func GetUserNamespace(pid uint32) string {
return fmt.Sprintf(userNSFormat, pid)
}
// WithCDI updates OCI spec with CDI content // WithCDI updates OCI spec with CDI content
func WithCDI(annotations map[string]string) oci.SpecOpts { func WithCDI(annotations map[string]string) oci.SpecOpts {
return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error { return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {

View File

@ -313,7 +313,8 @@ func (c *criService) containerSpec(
specOpts = append(specOpts, specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), // TODO: This is a hack to make this compile. We should move userns support to sbserver.
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
customopts.WithSupplementalGroups(supplementalGroups), customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID), customopts.WithAnnotation(annotations.SandboxID, sandboxID),

View File

@ -184,7 +184,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec)) log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))
// Grab any platform specific snapshotter opts. // Grab any platform specific snapshotter opts.
sOpts := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config) sOpts, err := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config)
if err != nil {
return nil, err
}
// Set snapshotter before any other options. // Set snapshotter before any other options.
opts := []containerd.NewContainerOpts{ opts := []containerd.NewContainerOpts{

View File

@ -311,9 +311,14 @@ func (c *criService) containerSpec(
targetPid = status.Pid targetPid = status.Pid
} }
uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions())
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
specOpts = append(specOpts, specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj), customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid), customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids),
customopts.WithSupplementalGroups(supplementalGroups), customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer), customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID), customopts.WithAnnotation(annotations.SandboxID, sandboxID),
@ -601,6 +606,7 @@ func generateUserString(username string, uid, gid *runtime.Int64Value) (string,
} }
// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot // snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{} nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
} }

View File

@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) {
} }
} }
func TestUserNamespace(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)
testSandboxID := "sandbox-id"
testContainerName := "container-name"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
for desc, test := range map[string]struct {
userNS *runtime.UserNamespace
expNS *runtimespec.LinuxNamespace
expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace
expUIDMapping []runtimespec.LinuxIDMapping
expGIDMapping []runtimespec.LinuxIDMapping
err bool
}{
"node namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE},
// Expect userns to NOT be present.
expNotNS: &runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(testPid),
},
},
"node namespace mode with mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
err: true,
},
"container namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER},
err: true,
},
"target namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET},
err: true,
},
"unknown namespace mode": {
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)},
err: true,
},
"pod namespace mode": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
expNS: &runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(testPid),
},
expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
},
"pod namespace mode with several mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
err: true,
},
"pod namespace mode with uneven mappings": {
userNS: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
err: true,
},
} {
t.Run(desc, func(t *testing.T) {
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS}
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
if test.err {
assert.Error(t, err)
assert.Nil(t, spec)
return
}
assert.NoError(t, err)
assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping)
assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping)
if test.expNS != nil {
assert.Contains(t, spec.Linux.Namespaces, *test.expNS)
}
if test.expNotNS != nil {
assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS)
}
})
}
}
func TestNoDefaultRunMount(t *testing.T) { func TestNoDefaultRunMount(t *testing.T) {
testID := "test-id" testID := "test-id"
testPid := uint32(1234) testPid := uint32(1234)

View File

@ -55,6 +55,6 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon
} }
// snapshotterOpts returns snapshotter options for the rootfs snapshot // snapshotterOpts returns snapshotter options for the rootfs snapshot
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{} return []snapshots.Opt{}, nil
} }

View File

@ -145,7 +145,7 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon
} }
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer // snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt { func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
var opts []snapshots.Opt var opts []snapshots.Opt
switch snapshotterName { switch snapshotterName {
@ -160,5 +160,5 @@ func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []
} }
} }
return opts return opts, nil
} }

View File

@ -28,11 +28,13 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/log" "github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount" "github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/apparmor" "github.com/containerd/containerd/pkg/apparmor"
"github.com/containerd/containerd/pkg/seccomp" "github.com/containerd/containerd/pkg/seccomp"
"github.com/containerd/containerd/pkg/seutil" "github.com/containerd/containerd/pkg/seutil"
"github.com/containerd/containerd/snapshots"
"github.com/moby/sys/mountinfo" "github.com/moby/sys/mountinfo"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label" "github.com/opencontainers/selinux/go-selinux/label"
@ -275,3 +277,92 @@ func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
spec.Process.SelinuxLabel = l spec.Process.SelinuxLabel = l
return nil return nil
} }
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]specs.LinuxIDMapping, error) {
var m []specs.LinuxIDMapping
if len(runtimeIDMap) == 0 {
return m, nil
}
if len(runtimeIDMap) > 1 {
// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
}
// We know len is 1 now.
if runtimeIDMap[0] == nil {
return m, nil
}
uidMap := *runtimeIDMap[0]
if uidMap.Length < 1 {
return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
}
m = []specs.LinuxIDMapping{
{
ContainerID: uidMap.ContainerId,
HostID: uidMap.HostId,
Size: uidMap.Length,
},
}
return m, nil
}
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []specs.LinuxIDMapping, retErr error) {
if userns == nil {
// If userns is not set, the kubelet doesn't support this option
// and we should just fallback to no userns. This is completely
// valid.
return nil, nil, nil
}
uidRuntimeMap := userns.GetUids()
gidRuntimeMap := userns.GetGids()
uids, err := parseUsernsIDMap(uidRuntimeMap)
if err != nil {
return nil, nil, fmt.Errorf("UID mapping: %w", err)
}
gids, err = parseUsernsIDMap(gidRuntimeMap)
if err != nil {
return nil, nil, fmt.Errorf("GID mapping: %w", err)
}
switch mode := userns.GetMode(); mode {
case runtime.NamespaceMode_NODE:
if len(uids) != 0 || len(gids) != 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
}
case runtime.NamespaceMode_POD:
// This is valid, we will handle it in WithPodNamespaces().
if len(uids) == 0 || len(gids) == 0 {
return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
}
default:
return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
return uids, gids, nil
}
func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
snapshotOpt := []snapshots.Opt{}
usernsOpts := nsOpts.GetUsernsOptions()
if usernsOpts == nil {
return snapshotOpt, nil
}
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
}
return snapshotOpt, nil
}

View File

@ -23,6 +23,7 @@ import (
"fmt" "fmt"
"math" "math"
"path/filepath" "path/filepath"
goruntime "runtime"
"strings" "strings"
"time" "time"
@ -157,10 +158,17 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to generate runtime options: %w", err) return nil, fmt.Errorf("failed to generate runtime options: %w", err)
} }
snapshotterOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))
sOpts := []snapshots.Opt{snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))}
extraSOpts, err := sandboxSnapshotterOpts(config)
if err != nil {
return nil, err
}
sOpts = append(sOpts, extraSOpts...)
opts := []containerd.NewContainerOpts{ opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)), containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)),
customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt), customopts.WithNewSnapshot(id, containerdImage, sOpts...),
containerd.WithSpec(spec, specOpts...), containerd.WithSpec(spec, specOpts...),
containerd.WithContainerLabels(sandboxLabels), containerd.WithContainerLabels(sandboxLabels),
containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata), containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata),
@ -244,8 +252,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, fmt.Errorf("failed to get sandbox container info: %w", err) return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
} }
userNsEnabled := false
if goruntime.GOOS != "windows" {
usernsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions()
if usernsOpts != nil && usernsOpts.GetMode() == runtime.NamespaceMode_POD {
userNsEnabled = true
}
}
// Setup the network namespace if host networking wasn't requested. // Setup the network namespace if host networking wasn't requested.
if !hostNetwork(config) { if !hostNetwork(config) && !userNsEnabled {
// XXX: We do c&p of this code later for the podNetwork && userNsEnabled case too.
// We can't move this to a function, as the defer calls need to be executed if other
// errors are returned in this function. So, we would need more refactors to move
// this code to a function and the idea was to not change the current code for
// !userNsEnabled case, therefore doing it would defeat the purpose.
//
// The difference between the cases is the use of netns.NewNetNS() vs
// netns.NewNetNSFromPID() and we verify the task is still running in the other case.
//
// To simplify this, in the future, we should just remove this case (podNetwork &&
// !userNsEnabled) and just keep the other case (podNetwork && userNsEnabled).
netStart := time.Now() netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox // If it is not in host network namespace then create a namespace and set the sandbox
@ -353,6 +380,88 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, fmt.Errorf("failed to wait for sandbox container task: %w", err) return nil, fmt.Errorf("failed to wait for sandbox container task: %w", err)
} }
if !hostNetwork(config) && userNsEnabled {
// If userns is enabled, then the netns was created by the OCI runtime
// when creating "task". The OCI runtime needs to create the netns
// because, if userns is in use, the netns needs to be owned by the
// userns. So, let the OCI runtime just handle this for us.
// If the netns is not owned by the userns several problems will happen.
// For instance, the container will lack permission (even if
// capabilities are present) to modify the netns or, even worse, the OCI
// runtime will fail to mount sysfs:
// https://github.com/torvalds/linux/commit/7dc5dbc879bd0779924b5132a48b731a0bc04a1e#diff-4839664cd0c8eab716e064323c7cd71fR1164
netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
var netnsMountDir = "/var/run/netns"
if c.config.NetNSMountsUnderStateDir {
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
}
sandbox.NetNS, err = netns.NewNetNSFromPID(netnsMountDir, task.Pid())
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
// Verify task is still in created state.
if st, err := task.Status(ctx); err != nil || st.Status != containerd.Created {
return nil, fmt.Errorf("failed to create pod sandbox %q: err is %v - status is %q and is expected %q", id, err, st.Status, containerd.Created)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
// Remove the network namespace only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
return
}
sandbox.NetNSPath = ""
}
}()
// Update network namespace in the container's spec
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
if err := container.Update(ctx,
// Update spec of the container
containerd.UpdateContainerOpts(containerd.WithSpec(spec)),
// Update sandbox metadata to include NetNS info
containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata))); err != nil {
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
}
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
defer func() {
// Teardown the network only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Teardown network if an error is returned.
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
}
}
}()
// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
}
sandboxCreateNetworkTimer.UpdateSince(netStart)
}
if c.nri.isEnabled() { if c.nri.isEnabled() {
err = c.nri.runPodSandbox(ctx, &sandbox) err = c.nri.runPodSandbox(ctx, &sandbox)
if err != nil { if err != nil {

View File

@ -25,6 +25,7 @@ import (
"github.com/containerd/containerd" "github.com/containerd/containerd"
"github.com/containerd/containerd/oci" "github.com/containerd/containerd/oci"
"github.com/containerd/containerd/plugin" "github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1" imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtimespec "github.com/opencontainers/runtime-spec/specs-go"
selinux "github.com/opencontainers/selinux/go-selinux" selinux "github.com/opencontainers/selinux/go-selinux"
@ -95,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace)) specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
} }
usernsOpts := nsOptions.GetUsernsOptions()
uids, gids, err := parseUsernsIDs(usernsOpts)
if err != nil {
return nil, fmt.Errorf("user namespace configuration: %w", err)
}
if usernsOpts != nil {
switch mode := usernsOpts.GetMode(); mode {
case runtime.NamespaceMode_NODE:
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
case runtime.NamespaceMode_POD:
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
default:
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
}
}
// It's fine to generate the spec before the sandbox /dev/shm // It's fine to generate the spec before the sandbox /dev/shm
// is actually created. // is actually created.
sandboxDevShm := c.getSandboxDevShm(id) sandboxDevShm := c.getSandboxDevShm(id)
@ -358,3 +376,10 @@ func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath strin
} }
} }
} }
// sandboxSnapshotterOpts generates any platform specific snapshotter options
// for a sandbox container.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
return snapshotterRemapOpts(nsOpts)
}

View File

@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
func TestLinuxSandboxContainerSpec(t *testing.T) { func TestLinuxSandboxContainerSpec(t *testing.T) {
testID := "test-id" testID := "test-id"
nsPath := "test-cni" nsPath := "test-cni"
idMap := runtime.IDMapping{
HostId: 1000,
ContainerId: 1000,
Length: 10,
}
expIDMap := runtimespec.LinuxIDMapping{
HostID: 1000,
ContainerID: 1000,
Size: 10,
}
for desc, test := range map[string]struct { for desc, test := range map[string]struct {
configChange func(*runtime.PodSandboxConfig) configChange func(*runtime.PodSandboxConfig)
specCheck func(*testing.T, *runtimespec.Spec) specCheck func(*testing.T, *runtimespec.Spec)
@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
}) })
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
}, },
}, },
"host namespace": { "host namespace": {
@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{ assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.IPCNamespace, Type: runtimespec.IPCNamespace,
}) })
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0") assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
}, },
}, },
"user namespace": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_POD,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
})
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
},
},
"user namespace mode node and mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
"user namespace with several mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap, &idMap},
},
},
}
},
expectErr: true,
},
"user namespace with uneven mappings": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_NODE,
Uids: []*runtime.IDMapping{&idMap, &idMap},
Gids: []*runtime.IDMapping{&idMap},
},
},
}
},
expectErr: true,
},
"user namespace mode container": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_CONTAINER,
},
},
}
},
expectErr: true,
},
"user namespace mode target": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode_TARGET,
},
},
}
},
expectErr: true,
},
"user namespace unknown mode": {
configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
NamespaceOptions: &runtime.NamespaceOption{
UsernsOptions: &runtime.UserNamespace{
Mode: runtime.NamespaceMode(100),
},
},
}
},
expectErr: true,
},
"should set supplemental groups correctly": { "should set supplemental groups correctly": {
configChange: func(c *runtime.PodSandboxConfig) { configChange: func(c *runtime.PodSandboxConfig) {
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{ c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{

View File

@ -21,6 +21,7 @@ package server
import ( import (
"github.com/containerd/containerd" "github.com/containerd/containerd"
"github.com/containerd/containerd/oci" "github.com/containerd/containerd/oci"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1" imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@ -56,3 +57,9 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
} }
// sandboxSnapshotterOpts generates any platform specific snapshotter options
// for a sandbox container.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@ -22,6 +22,7 @@ import (
"github.com/containerd/containerd" "github.com/containerd/containerd"
"github.com/containerd/containerd/oci" "github.com/containerd/containerd/oci"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1" imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@ -116,3 +117,8 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
spec.Windows.Network.NetworkNamespace = nsPath spec.Windows.Network.NetworkNamespace = nsPath
} }
// No sandbox snapshotter options needed for windows.
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
return []snapshots.Opt{}, nil
}

View File

@ -50,7 +50,9 @@ import (
// newNS creates a new persistent (bind-mounted) network namespace and returns the // newNS creates a new persistent (bind-mounted) network namespace and returns the
// path to the network namespace. // path to the network namespace.
func newNS(baseDir string) (nsPath string, err error) { // If pid is not 0, returns the netns from that pid persistently mounted. Otherwise,
// a new netns is created.
func newNS(baseDir string, pid uint32) (nsPath string, err error) {
b := make([]byte, 16) b := make([]byte, 16)
_, err = rand.Read(b) _, err = rand.Read(b)
@ -81,6 +83,16 @@ func newNS(baseDir string) (nsPath string, err error) {
} }
}() }()
if pid != 0 {
procNsPath := getNetNSPathFromPID(pid)
// bind mount the netns onto the mount point. This causes the namespace
// to persist, even when there are no threads in the ns.
if err = unix.Mount(procNsPath, nsPath, "none", unix.MS_BIND, ""); err != nil {
return "", fmt.Errorf("failed to bind mount ns src: %v at %s: %w", procNsPath, nsPath, err)
}
return nsPath, nil
}
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(1) wg.Add(1)
@ -155,6 +167,10 @@ func getCurrentThreadNetNSPath() string {
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
} }
func getNetNSPathFromPID(pid uint32) string {
return fmt.Sprintf("/proc/%d/ns/net", pid)
}
// NetNS holds network namespace. // NetNS holds network namespace.
type NetNS struct { type NetNS struct {
path string path string
@ -162,7 +178,12 @@ type NetNS struct {
// NewNetNS creates a network namespace. // NewNetNS creates a network namespace.
func NewNetNS(baseDir string) (*NetNS, error) { func NewNetNS(baseDir string) (*NetNS, error) {
path, err := newNS(baseDir) return NewNetNSFromPID(baseDir, 0)
}
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
path, err := newNS(baseDir, pid)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to setup netns: %w", err) return nil, fmt.Errorf("failed to setup netns: %w", err)
} }

View File

@ -35,6 +35,11 @@ func NewNetNS(baseDir string) (*NetNS, error) {
return nil, errNotImplementedOnUnix return nil, errNotImplementedOnUnix
} }
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
return nil, errNotImplementedOnUnix
}
// LoadNetNS loads existing network namespace. // LoadNetNS loads existing network namespace.
func LoadNetNS(path string) *NetNS { func LoadNetNS(path string) *NetNS {
return &NetNS{path: path} return &NetNS{path: path}

View File

@ -16,14 +16,20 @@
package netns package netns
import "github.com/Microsoft/hcsshim/hcn" import (
"errors"
"github.com/Microsoft/hcsshim/hcn"
)
var errNotImplementedOnWindows = errors.New("not implemented on windows")
// NetNS holds network namespace for sandbox // NetNS holds network namespace for sandbox
type NetNS struct { type NetNS struct {
path string path string
} }
// NewNetNS creates a network namespace for the sandbox // NewNetNS creates a network namespace for the sandbox.
func NewNetNS(baseDir string) (*NetNS, error) { func NewNetNS(baseDir string) (*NetNS, error) {
temp := hcn.HostComputeNamespace{} temp := hcn.HostComputeNamespace{}
hcnNamespace, err := temp.Create() hcnNamespace, err := temp.Create()
@ -34,6 +40,11 @@ func NewNetNS(baseDir string) (*NetNS, error) {
return &NetNS{path: hcnNamespace.Id}, nil return &NetNS{path: hcnNamespace.Id}, nil
} }
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
return nil, errNotImplementedOnWindows
}
// LoadNetNS loads existing network namespace. // LoadNetNS loads existing network namespace.
func LoadNetNS(path string) *NetNS { func LoadNetNS(path string) *NetNS {
return &NetNS{path: path} return &NetNS{path: path}

View File

@ -33,6 +33,11 @@ const (
UnpackKeyFormat = UnpackKeyPrefix + "-%s %s" UnpackKeyFormat = UnpackKeyPrefix + "-%s %s"
inheritedLabelsPrefix = "containerd.io/snapshot/" inheritedLabelsPrefix = "containerd.io/snapshot/"
labelSnapshotRef = "containerd.io/snapshot.ref" labelSnapshotRef = "containerd.io/snapshot.ref"
// LabelSnapshotUIDMapping is the label used for UID mappings
LabelSnapshotUIDMapping = "containerd.io/snapshot/uidmapping"
// LabelSnapshotGIDMapping is the label used for GID mappings
LabelSnapshotGIDMapping = "containerd.io/snapshot/gidmapping"
) )
// Kind identifies the kind of snapshot. // Kind identifies the kind of snapshot.

View File

@ -19,17 +19,92 @@
package containerd package containerd
import ( import (
"context"
"fmt" "fmt"
"github.com/containerd/containerd/snapshots" "github.com/containerd/containerd/snapshots"
) )
const (
capabRemapIDs = "remap-ids"
)
// WithRemapperLabels creates the labels used by any supporting snapshotter // WithRemapperLabels creates the labels used by any supporting snapshotter
// to shift the filesystem ownership (user namespace mapping) automatically; currently // to shift the filesystem ownership (user namespace mapping) automatically; currently
// supported by the fuse-overlayfs snapshotter // supported by the fuse-overlayfs snapshotter
func WithRemapperLabels(ctrUID, hostUID, ctrGID, hostGID, length uint32) snapshots.Opt { func WithRemapperLabels(ctrUID, hostUID, ctrGID, hostGID, length uint32) snapshots.Opt {
return snapshots.WithLabels(map[string]string{ return snapshots.WithLabels(map[string]string{
"containerd.io/snapshot/uidmapping": fmt.Sprintf("%d:%d:%d", ctrUID, hostUID, length), snapshots.LabelSnapshotUIDMapping: fmt.Sprintf("%d:%d:%d", ctrUID, hostUID, length),
"containerd.io/snapshot/gidmapping": fmt.Sprintf("%d:%d:%d", ctrGID, hostGID, length), snapshots.LabelSnapshotGIDMapping: fmt.Sprintf("%d:%d:%d", ctrGID, hostGID, length)})
}) }
func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName string, snapshotter snapshots.Snapshotter, parent string, opts ...snapshots.Opt) (string, error) {
capabs, err := client.GetSnapshotterCapabilities(ctx, snapshotterName)
if err != nil {
return "", err
}
for _, capab := range capabs {
if capab == capabRemapIDs {
// Snapshotter supports ID remapping, we don't need to do anything.
return parent, nil
}
}
var local snapshots.Info
for _, opt := range opts {
opt(&local)
}
needsRemap := false
var uidMap, gidMap string
if value, ok := local.Labels[snapshots.LabelSnapshotUIDMapping]; ok {
needsRemap = true
uidMap = value
}
if value, ok := local.Labels[snapshots.LabelSnapshotGIDMapping]; ok {
needsRemap = true
gidMap = value
}
if !needsRemap {
return parent, nil
}
var ctrUID, hostUID, length uint32
_, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length)
if err != nil {
return "", fmt.Errorf("uidMap unparsable: %w", err)
}
var ctrGID, hostGID, lengthGID uint32
_, err = fmt.Sscanf(gidMap, "%d:%d:%d", &ctrGID, &hostGID, &lengthGID)
if err != nil {
return "", fmt.Errorf("gidMap unparsable: %w", err)
}
if ctrUID != 0 || ctrGID != 0 {
return "", fmt.Errorf("Container UID/GID of 0 only supported currently (%d/%d)", ctrUID, ctrGID)
}
// TODO(dgl): length isn't taken into account for the intermediate snapshot id.
usernsID := fmt.Sprintf("%s-%d-%d", parent, hostUID, hostGID)
if _, err := snapshotter.Stat(ctx, usernsID); err == nil {
return usernsID, nil
}
mounts, err := snapshotter.Prepare(ctx, usernsID+"-remap", parent)
if err != nil {
return "", err
}
// TODO(dgl): length isn't taken into account here yet either.
if err := remapRootFS(ctx, mounts, hostUID, hostGID); err != nil {
snapshotter.Remove(ctx, usernsID+"-remap")
return "", err
}
if err := snapshotter.Commit(ctx, usernsID, usernsID+"-remap"); err != nil {
return "", err
}
return usernsID, nil
} }

View File

@ -0,0 +1,27 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package containerd
import (
"context"
"github.com/containerd/containerd/snapshots"
)
func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName string, snapshotter snapshots.Snapshotter, parent string, opts ...snapshots.Opt) (string, error) {
return parent, nil
}