Merge pull request #7679 from kinvolk/rata/userns-stateless-pods
Add support for user namespaces in stateless pods (KEP-127)
This commit is contained in:
commit
66f186d42d
18
client.go
18
client.go
@ -866,3 +866,21 @@ func toPlatforms(pt []*apitypes.Platform) []ocispec.Platform {
|
||||
}
|
||||
return platforms
|
||||
}
|
||||
|
||||
// GetSnapshotterCapabilities returns the capabilities of a snapshotter.
|
||||
func (c *Client) GetSnapshotterCapabilities(ctx context.Context, snapshotterName string) ([]string, error) {
|
||||
filters := []string{fmt.Sprintf("type==%s, id==%s", plugin.SnapshotPlugin, snapshotterName)}
|
||||
in := c.IntrospectionService()
|
||||
|
||||
resp, err := in.Plugins(ctx, filters)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(resp.Plugins) <= 0 {
|
||||
return nil, fmt.Errorf("inspection service could not find snapshotter %s plugin", snapshotterName)
|
||||
}
|
||||
|
||||
sn := resp.Plugins[0]
|
||||
return sn.Capabilities, nil
|
||||
}
|
||||
|
@ -224,6 +224,11 @@ func WithNewSnapshot(id string, i Image, opts ...snapshots.Opt) NewContainerOpts
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
parent, err = resolveSnapshotOptions(ctx, client, c.Snapshotter, s, parent, opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := s.Prepare(ctx, id, parent, opts...); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -268,6 +273,11 @@ func WithNewSnapshotView(id string, i Image, opts ...snapshots.Opt) NewContainer
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
parent, err = resolveSnapshotOptions(ctx, client, c.Snapshotter, s, parent, opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := s.View(ctx, id, parent, opts...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -126,6 +126,35 @@ func WithHostNetwork(p *runtime.PodSandboxConfig) {
|
||||
p.Linux.SecurityContext.NamespaceOptions.Network = runtime.NamespaceMode_NODE
|
||||
}
|
||||
|
||||
// Set pod userns.
|
||||
func WithPodUserNs(containerID, hostID, length uint32) PodSandboxOpts {
|
||||
return func(p *runtime.PodSandboxConfig) {
|
||||
if p.Linux == nil {
|
||||
p.Linux = &runtime.LinuxPodSandboxConfig{}
|
||||
}
|
||||
if p.Linux.SecurityContext == nil {
|
||||
p.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{}
|
||||
}
|
||||
if p.Linux.SecurityContext.NamespaceOptions == nil {
|
||||
p.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
|
||||
}
|
||||
|
||||
idMap := runtime.IDMapping{
|
||||
HostId: hostID,
|
||||
ContainerId: containerID,
|
||||
Length: length,
|
||||
}
|
||||
if p.Linux.SecurityContext.NamespaceOptions.UsernsOptions == nil {
|
||||
p.Linux.SecurityContext.NamespaceOptions.UsernsOptions = &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
}
|
||||
}
|
||||
|
||||
p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids = append(p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids, &idMap)
|
||||
p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids = append(p.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids, &idMap)
|
||||
}
|
||||
}
|
||||
|
||||
// Set host pid.
|
||||
func WithHostPid(p *runtime.PodSandboxConfig) {
|
||||
if p.Linux == nil {
|
||||
@ -314,6 +343,35 @@ func WithPidNamespace(mode runtime.NamespaceMode) ContainerOpts {
|
||||
|
||||
}
|
||||
|
||||
// Add user namespace pod mode.
|
||||
func WithUserNamespace(containerID, hostID, length uint32) ContainerOpts {
|
||||
return func(c *runtime.ContainerConfig) {
|
||||
if c.Linux == nil {
|
||||
c.Linux = &runtime.LinuxContainerConfig{}
|
||||
}
|
||||
if c.Linux.SecurityContext == nil {
|
||||
c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
|
||||
}
|
||||
if c.Linux.SecurityContext.NamespaceOptions == nil {
|
||||
c.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
|
||||
}
|
||||
idMap := runtime.IDMapping{
|
||||
HostId: hostID,
|
||||
ContainerId: containerID,
|
||||
Length: length,
|
||||
}
|
||||
|
||||
if c.Linux.SecurityContext.NamespaceOptions.UsernsOptions == nil {
|
||||
c.Linux.SecurityContext.NamespaceOptions.UsernsOptions = &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
}
|
||||
}
|
||||
|
||||
c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids = append(c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Uids, &idMap)
|
||||
c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids = append(c.Linux.SecurityContext.NamespaceOptions.UsernsOptions.Gids, &idMap)
|
||||
}
|
||||
}
|
||||
|
||||
// Add container log path.
|
||||
func WithLogPath(path string) ContainerOpts {
|
||||
return func(c *runtime.ContainerConfig) {
|
||||
|
169
integration/pod_userns_linux_test.go
Normal file
169
integration/pod_userns_linux_test.go
Normal file
@ -0,0 +1,169 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/integration/images"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
exec "golang.org/x/sys/execabs"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func TestPodUserNS(t *testing.T) {
|
||||
containerID := uint32(0)
|
||||
hostID := uint32(65536)
|
||||
size := uint32(65536)
|
||||
for name, test := range map[string]struct {
|
||||
sandboxOpts []PodSandboxOpts
|
||||
containerOpts []ContainerOpts
|
||||
checkOutput func(t *testing.T, output string)
|
||||
expectErr bool
|
||||
}{
|
||||
"userns uid mapping": {
|
||||
sandboxOpts: []PodSandboxOpts{
|
||||
WithPodUserNs(containerID, hostID, size),
|
||||
},
|
||||
containerOpts: []ContainerOpts{
|
||||
WithUserNamespace(containerID, hostID, size),
|
||||
WithCommand("cat", "/proc/self/uid_map"),
|
||||
},
|
||||
checkOutput: func(t *testing.T, output string) {
|
||||
// The output should contain the length of the userns requested.
|
||||
assert.Contains(t, output, fmt.Sprint(size))
|
||||
},
|
||||
},
|
||||
"userns gid mapping": {
|
||||
sandboxOpts: []PodSandboxOpts{
|
||||
WithPodUserNs(containerID, hostID, size),
|
||||
},
|
||||
containerOpts: []ContainerOpts{
|
||||
WithUserNamespace(containerID, hostID, size),
|
||||
WithCommand("cat", "/proc/self/gid_map"),
|
||||
},
|
||||
checkOutput: func(t *testing.T, output string) {
|
||||
// The output should contain the length of the userns requested.
|
||||
assert.Contains(t, output, fmt.Sprint(size))
|
||||
},
|
||||
},
|
||||
"rootfs permissions": {
|
||||
sandboxOpts: []PodSandboxOpts{
|
||||
WithPodUserNs(containerID, hostID, size),
|
||||
},
|
||||
containerOpts: []ContainerOpts{
|
||||
WithUserNamespace(containerID, hostID, size),
|
||||
// Prints numeric UID and GID for path.
|
||||
// For example, if UID and GID is 0 it will print: =0=0=
|
||||
// We add the "=" signs so we use can assert.Contains() and be sure
|
||||
// the UID/GID is 0 and not things like 100 (that contain 0).
|
||||
// We can't use assert.Equal() easily as it contains timestamp, etc.
|
||||
WithCommand("stat", "-c", "'=%u=%g='", "/root/"),
|
||||
},
|
||||
checkOutput: func(t *testing.T, output string) {
|
||||
// The UID and GID should be 0 (root) if the chown/remap is done correctly.
|
||||
assert.Contains(t, output, "=0=0=")
|
||||
},
|
||||
},
|
||||
"fails with several mappings": {
|
||||
sandboxOpts: []PodSandboxOpts{
|
||||
WithPodUserNs(containerID, hostID, size),
|
||||
WithPodUserNs(containerID*2, hostID*2, size*2),
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
if os.Getenv("ENABLE_CRI_SANDBOXES") == "'sandboxed'" {
|
||||
t.Skip("skipping test: userns not supported/needed in sanboxed runtimes")
|
||||
}
|
||||
cmd := exec.Command("true")
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Cloneflags: syscall.CLONE_NEWUSER,
|
||||
}
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Skip("skipping test: user namespaces are unavailable")
|
||||
}
|
||||
|
||||
testPodLogDir := t.TempDir()
|
||||
sandboxOpts := append(test.sandboxOpts, WithPodLogDirectory(testPodLogDir))
|
||||
t.Log("Create a sandbox with userns")
|
||||
sbConfig := PodSandboxConfig("sandbox", "userns", sandboxOpts...)
|
||||
sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
|
||||
if err != nil {
|
||||
if !test.expectErr {
|
||||
t.Fatalf("Unexpected RunPodSandbox error: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Make sure the sandbox is cleaned up.
|
||||
defer func() {
|
||||
assert.NoError(t, runtimeService.StopPodSandbox(sb))
|
||||
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
|
||||
}()
|
||||
if test.expectErr {
|
||||
t.Fatalf("Expected RunPodSandbox to return error")
|
||||
}
|
||||
|
||||
var (
|
||||
testImage = images.Get(images.BusyBox)
|
||||
containerName = "test-container"
|
||||
)
|
||||
|
||||
EnsureImageExists(t, testImage)
|
||||
|
||||
containerOpts := append(test.containerOpts,
|
||||
WithLogPath(containerName),
|
||||
)
|
||||
t.Log("Create a container for userns")
|
||||
cnConfig := ContainerConfig(
|
||||
containerName,
|
||||
testImage,
|
||||
containerOpts...,
|
||||
)
|
||||
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Start the container")
|
||||
require.NoError(t, runtimeService.StartContainer(cn))
|
||||
|
||||
t.Log("Wait for container to finish running")
|
||||
require.NoError(t, Eventually(func() (bool, error) {
|
||||
s, err := runtimeService.ContainerStatus(cn)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}, time.Second, 30*time.Second))
|
||||
|
||||
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
|
||||
assert.NoError(t, err)
|
||||
|
||||
t.Log("Running check function")
|
||||
test.checkOutput(t, string(content))
|
||||
})
|
||||
}
|
||||
}
|
@ -661,7 +661,7 @@ func WithSupplementalGroups(groups []int64) oci.SpecOpts {
|
||||
}
|
||||
|
||||
// WithPodNamespaces sets the pod namespaces for the container
|
||||
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32) oci.SpecOpts {
|
||||
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
|
||||
namespaces := config.GetNamespaceOptions()
|
||||
|
||||
opts := []oci.SpecOpts{
|
||||
@ -672,6 +672,17 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid
|
||||
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
|
||||
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(targetPid)}))
|
||||
}
|
||||
|
||||
if namespaces.GetUsernsOptions() != nil {
|
||||
switch namespaces.GetUsernsOptions().GetMode() {
|
||||
case runtime.NamespaceMode_NODE:
|
||||
// Nothing to do. Not adding userns field uses the node userns.
|
||||
case runtime.NamespaceMode_POD:
|
||||
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(sandboxPid)}))
|
||||
opts = append(opts, oci.WithUserNamespace(uids, gids))
|
||||
}
|
||||
}
|
||||
|
||||
return oci.Compose(opts...)
|
||||
}
|
||||
|
||||
@ -745,6 +756,8 @@ const (
|
||||
utsNSFormat = "/proc/%v/ns/uts"
|
||||
// pidNSFormat is the format of pid namespace of a process.
|
||||
pidNSFormat = "/proc/%v/ns/pid"
|
||||
// userNSFormat is the format of user namespace of a process.
|
||||
userNSFormat = "/proc/%v/ns/user"
|
||||
)
|
||||
|
||||
// GetNetworkNamespace returns the network namespace of a process.
|
||||
@ -767,6 +780,11 @@ func GetPIDNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(pidNSFormat, pid)
|
||||
}
|
||||
|
||||
// GetUserNamespace returns the user namespace of a process.
|
||||
func GetUserNamespace(pid uint32) string {
|
||||
return fmt.Sprintf(userNSFormat, pid)
|
||||
}
|
||||
|
||||
// WithCDI updates OCI spec with CDI content
|
||||
func WithCDI(annotations map[string]string) oci.SpecOpts {
|
||||
return func(ctx context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {
|
||||
|
@ -313,7 +313,8 @@ func (c *criService) containerSpec(
|
||||
|
||||
specOpts = append(specOpts,
|
||||
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
|
||||
// TODO: This is a hack to make this compile. We should move userns support to sbserver.
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, nil, nil),
|
||||
customopts.WithSupplementalGroups(supplementalGroups),
|
||||
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
|
||||
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
|
||||
|
@ -184,7 +184,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
||||
log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))
|
||||
|
||||
// Grab any platform specific snapshotter opts.
|
||||
sOpts := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config)
|
||||
sOpts, err := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Set snapshotter before any other options.
|
||||
opts := []containerd.NewContainerOpts{
|
||||
|
@ -311,9 +311,14 @@ func (c *criService) containerSpec(
|
||||
targetPid = status.Pid
|
||||
}
|
||||
|
||||
uids, gids, err := parseUsernsIDs(nsOpts.GetUsernsOptions())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("user namespace configuration: %w", err)
|
||||
}
|
||||
|
||||
specOpts = append(specOpts,
|
||||
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid),
|
||||
customopts.WithPodNamespaces(securityContext, sandboxPid, targetPid, uids, gids),
|
||||
customopts.WithSupplementalGroups(supplementalGroups),
|
||||
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
|
||||
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
|
||||
@ -601,6 +606,7 @@ func generateUserString(username string, uid, gid *runtime.Int64Value) (string,
|
||||
}
|
||||
|
||||
// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
|
||||
return []snapshots.Opt{}
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
|
||||
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
|
||||
return snapshotterRemapOpts(nsOpts)
|
||||
}
|
||||
|
@ -804,6 +804,113 @@ func TestPidNamespace(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUserNamespace(t *testing.T) {
|
||||
testID := "test-id"
|
||||
testPid := uint32(1234)
|
||||
testSandboxID := "sandbox-id"
|
||||
testContainerName := "container-name"
|
||||
idMap := runtime.IDMapping{
|
||||
HostId: 1000,
|
||||
ContainerId: 1000,
|
||||
Length: 10,
|
||||
}
|
||||
expIDMap := runtimespec.LinuxIDMapping{
|
||||
HostID: 1000,
|
||||
ContainerID: 1000,
|
||||
Size: 10,
|
||||
}
|
||||
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
|
||||
ociRuntime := config.Runtime{}
|
||||
c := newTestCRIService()
|
||||
for desc, test := range map[string]struct {
|
||||
userNS *runtime.UserNamespace
|
||||
expNS *runtimespec.LinuxNamespace
|
||||
expNotNS *runtimespec.LinuxNamespace // Does NOT contain this namespace
|
||||
expUIDMapping []runtimespec.LinuxIDMapping
|
||||
expGIDMapping []runtimespec.LinuxIDMapping
|
||||
err bool
|
||||
}{
|
||||
"node namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_NODE},
|
||||
// Expect userns to NOT be present.
|
||||
expNotNS: &runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
Path: opts.GetUserNamespace(testPid),
|
||||
},
|
||||
},
|
||||
"node namespace mode with mappings": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
err: true,
|
||||
},
|
||||
"container namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_CONTAINER},
|
||||
err: true,
|
||||
},
|
||||
"target namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode_TARGET},
|
||||
err: true,
|
||||
},
|
||||
"unknown namespace mode": {
|
||||
userNS: &runtime.UserNamespace{Mode: runtime.NamespaceMode(100)},
|
||||
err: true,
|
||||
},
|
||||
"pod namespace mode": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
expNS: &runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
Path: opts.GetUserNamespace(testPid),
|
||||
},
|
||||
expUIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
|
||||
expGIDMapping: []runtimespec.LinuxIDMapping{expIDMap},
|
||||
},
|
||||
"pod namespace mode with several mappings": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
},
|
||||
err: true,
|
||||
},
|
||||
"pod namespace mode with uneven mappings": {
|
||||
userNS: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
err: true,
|
||||
},
|
||||
} {
|
||||
t.Run(desc, func(t *testing.T) {
|
||||
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{UsernsOptions: test.userNS}
|
||||
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
|
||||
|
||||
if test.err {
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, spec)
|
||||
return
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, spec.Linux.UIDMappings, test.expUIDMapping)
|
||||
assert.Equal(t, spec.Linux.GIDMappings, test.expGIDMapping)
|
||||
|
||||
if test.expNS != nil {
|
||||
assert.Contains(t, spec.Linux.Namespaces, *test.expNS)
|
||||
}
|
||||
if test.expNotNS != nil {
|
||||
assert.NotContains(t, spec.Linux.Namespaces, *test.expNotNS)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoDefaultRunMount(t *testing.T) {
|
||||
testID := "test-id"
|
||||
testPid := uint32(1234)
|
||||
|
@ -55,6 +55,6 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon
|
||||
}
|
||||
|
||||
// snapshotterOpts returns snapshotter options for the rootfs snapshot
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
|
||||
return []snapshots.Opt{}
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
|
||||
return []snapshots.Opt{}, nil
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageCon
|
||||
}
|
||||
|
||||
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([]snapshots.Opt, error) {
|
||||
var opts []snapshots.Opt
|
||||
|
||||
switch snapshotterName {
|
||||
@ -160,5 +160,5 @@ func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []
|
||||
}
|
||||
}
|
||||
|
||||
return opts
|
||||
return opts, nil
|
||||
}
|
||||
|
@ -28,11 +28,13 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/log"
|
||||
"github.com/containerd/containerd/mount"
|
||||
"github.com/containerd/containerd/pkg/apparmor"
|
||||
"github.com/containerd/containerd/pkg/seccomp"
|
||||
"github.com/containerd/containerd/pkg/seutil"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
@ -275,3 +277,92 @@ func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
|
||||
spec.Process.SelinuxLabel = l
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]specs.LinuxIDMapping, error) {
|
||||
var m []specs.LinuxIDMapping
|
||||
|
||||
if len(runtimeIDMap) == 0 {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
if len(runtimeIDMap) > 1 {
|
||||
// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
|
||||
return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
|
||||
}
|
||||
|
||||
// We know len is 1 now.
|
||||
if runtimeIDMap[0] == nil {
|
||||
return m, nil
|
||||
}
|
||||
uidMap := *runtimeIDMap[0]
|
||||
|
||||
if uidMap.Length < 1 {
|
||||
return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
|
||||
}
|
||||
|
||||
m = []specs.LinuxIDMapping{
|
||||
{
|
||||
ContainerID: uidMap.ContainerId,
|
||||
HostID: uidMap.HostId,
|
||||
Size: uidMap.Length,
|
||||
},
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []specs.LinuxIDMapping, retErr error) {
|
||||
if userns == nil {
|
||||
// If userns is not set, the kubelet doesn't support this option
|
||||
// and we should just fallback to no userns. This is completely
|
||||
// valid.
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
uidRuntimeMap := userns.GetUids()
|
||||
gidRuntimeMap := userns.GetGids()
|
||||
|
||||
uids, err := parseUsernsIDMap(uidRuntimeMap)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("UID mapping: %w", err)
|
||||
}
|
||||
|
||||
gids, err = parseUsernsIDMap(gidRuntimeMap)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("GID mapping: %w", err)
|
||||
}
|
||||
|
||||
switch mode := userns.GetMode(); mode {
|
||||
case runtime.NamespaceMode_NODE:
|
||||
if len(uids) != 0 || len(gids) != 0 {
|
||||
return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
|
||||
}
|
||||
case runtime.NamespaceMode_POD:
|
||||
// This is valid, we will handle it in WithPodNamespaces().
|
||||
if len(uids) == 0 || len(gids) == 0 {
|
||||
return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
|
||||
}
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
|
||||
}
|
||||
|
||||
return uids, gids, nil
|
||||
}
|
||||
|
||||
func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
|
||||
snapshotOpt := []snapshots.Opt{}
|
||||
usernsOpts := nsOpts.GetUsernsOptions()
|
||||
if usernsOpts == nil {
|
||||
return snapshotOpt, nil
|
||||
}
|
||||
|
||||
uids, gids, err := parseUsernsIDs(usernsOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("user namespace configuration: %w", err)
|
||||
}
|
||||
|
||||
if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
|
||||
snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
|
||||
}
|
||||
return snapshotOpt, nil
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"fmt"
|
||||
"math"
|
||||
"path/filepath"
|
||||
goruntime "runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -157,10 +158,17 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate runtime options: %w", err)
|
||||
}
|
||||
snapshotterOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))
|
||||
|
||||
sOpts := []snapshots.Opt{snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))}
|
||||
extraSOpts, err := sandboxSnapshotterOpts(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sOpts = append(sOpts, extraSOpts...)
|
||||
|
||||
opts := []containerd.NewContainerOpts{
|
||||
containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)),
|
||||
customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt),
|
||||
customopts.WithNewSnapshot(id, containerdImage, sOpts...),
|
||||
containerd.WithSpec(spec, specOpts...),
|
||||
containerd.WithContainerLabels(sandboxLabels),
|
||||
containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata),
|
||||
@ -244,8 +252,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
||||
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
|
||||
}
|
||||
|
||||
userNsEnabled := false
|
||||
if goruntime.GOOS != "windows" {
|
||||
usernsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions()
|
||||
if usernsOpts != nil && usernsOpts.GetMode() == runtime.NamespaceMode_POD {
|
||||
userNsEnabled = true
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the network namespace if host networking wasn't requested.
|
||||
if !hostNetwork(config) {
|
||||
if !hostNetwork(config) && !userNsEnabled {
|
||||
// XXX: We do c&p of this code later for the podNetwork && userNsEnabled case too.
|
||||
// We can't move this to a function, as the defer calls need to be executed if other
|
||||
// errors are returned in this function. So, we would need more refactors to move
|
||||
// this code to a function and the idea was to not change the current code for
|
||||
// !userNsEnabled case, therefore doing it would defeat the purpose.
|
||||
//
|
||||
// The difference between the cases is the use of netns.NewNetNS() vs
|
||||
// netns.NewNetNSFromPID() and we verify the task is still running in the other case.
|
||||
//
|
||||
// To simplify this, in the future, we should just remove this case (podNetwork &&
|
||||
// !userNsEnabled) and just keep the other case (podNetwork && userNsEnabled).
|
||||
netStart := time.Now()
|
||||
|
||||
// If it is not in host network namespace then create a namespace and set the sandbox
|
||||
@ -353,6 +380,88 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
||||
return nil, fmt.Errorf("failed to wait for sandbox container task: %w", err)
|
||||
}
|
||||
|
||||
if !hostNetwork(config) && userNsEnabled {
|
||||
// If userns is enabled, then the netns was created by the OCI runtime
|
||||
// when creating "task". The OCI runtime needs to create the netns
|
||||
// because, if userns is in use, the netns needs to be owned by the
|
||||
// userns. So, let the OCI runtime just handle this for us.
|
||||
// If the netns is not owned by the userns several problems will happen.
|
||||
// For instance, the container will lack permission (even if
|
||||
// capabilities are present) to modify the netns or, even worse, the OCI
|
||||
// runtime will fail to mount sysfs:
|
||||
// https://github.com/torvalds/linux/commit/7dc5dbc879bd0779924b5132a48b731a0bc04a1e#diff-4839664cd0c8eab716e064323c7cd71fR1164
|
||||
netStart := time.Now()
|
||||
|
||||
// If it is not in host network namespace then create a namespace and set the sandbox
|
||||
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
||||
// namespaces. If the pod is in host network namespace then both are empty and should not
|
||||
// be used.
|
||||
var netnsMountDir = "/var/run/netns"
|
||||
if c.config.NetNSMountsUnderStateDir {
|
||||
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
|
||||
}
|
||||
sandbox.NetNS, err = netns.NewNetNSFromPID(netnsMountDir, task.Pid())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
|
||||
}
|
||||
|
||||
// Verify task is still in created state.
|
||||
if st, err := task.Status(ctx); err != nil || st.Status != containerd.Created {
|
||||
return nil, fmt.Errorf("failed to create pod sandbox %q: err is %v - status is %q and is expected %q", id, err, st.Status, containerd.Created)
|
||||
}
|
||||
sandbox.NetNSPath = sandbox.NetNS.GetPath()
|
||||
|
||||
defer func() {
|
||||
// Remove the network namespace only if all the resource cleanup is done.
|
||||
if retErr != nil && cleanupErr == nil {
|
||||
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
|
||||
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
|
||||
return
|
||||
}
|
||||
sandbox.NetNSPath = ""
|
||||
}
|
||||
}()
|
||||
|
||||
// Update network namespace in the container's spec
|
||||
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
|
||||
|
||||
if err := container.Update(ctx,
|
||||
// Update spec of the container
|
||||
containerd.UpdateContainerOpts(containerd.WithSpec(spec)),
|
||||
// Update sandbox metadata to include NetNS info
|
||||
containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata))); err != nil {
|
||||
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
|
||||
}
|
||||
|
||||
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
|
||||
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
|
||||
defer func() {
|
||||
// Teardown the network only if all the resource cleanup is done.
|
||||
if retErr != nil && cleanupErr == nil {
|
||||
deferCtx, deferCancel := ctrdutil.DeferContext()
|
||||
defer deferCancel()
|
||||
// Teardown network if an error is returned.
|
||||
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
|
||||
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Setup network for sandbox.
|
||||
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
|
||||
// rely on the assumption that CRI shim will not be querying the network namespace to check the
|
||||
// network states such as IP.
|
||||
// In future runtime implementation should avoid relying on CRI shim implementation details.
|
||||
// In this case however caching the IP will add a subtle performance enhancement by avoiding
|
||||
// calls to network namespace of the pod to query the IP of the veth interface on every
|
||||
// SandboxStatus request.
|
||||
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
||||
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
|
||||
}
|
||||
|
||||
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
||||
}
|
||||
|
||||
if c.nri.isEnabled() {
|
||||
err = c.nri.runPodSandbox(ctx, &sandbox)
|
||||
if err != nil {
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/plugin"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
@ -95,6 +96,23 @@ func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxC
|
||||
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
|
||||
}
|
||||
|
||||
usernsOpts := nsOptions.GetUsernsOptions()
|
||||
uids, gids, err := parseUsernsIDs(usernsOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("user namespace configuration: %w", err)
|
||||
}
|
||||
|
||||
if usernsOpts != nil {
|
||||
switch mode := usernsOpts.GetMode(); mode {
|
||||
case runtime.NamespaceMode_NODE:
|
||||
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
|
||||
case runtime.NamespaceMode_POD:
|
||||
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
|
||||
}
|
||||
}
|
||||
|
||||
// It's fine to generate the spec before the sandbox /dev/shm
|
||||
// is actually created.
|
||||
sandboxDevShm := c.getSandboxDevShm(id)
|
||||
@ -358,3 +376,10 @@ func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath strin
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sandboxSnapshotterOpts generates any platform specific snapshotter options
|
||||
// for a sandbox container.
|
||||
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
|
||||
nsOpts := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
|
||||
return snapshotterRemapOpts(nsOpts)
|
||||
}
|
||||
|
@ -98,6 +98,17 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
|
||||
func TestLinuxSandboxContainerSpec(t *testing.T) {
|
||||
testID := "test-id"
|
||||
nsPath := "test-cni"
|
||||
idMap := runtime.IDMapping{
|
||||
HostId: 1000,
|
||||
ContainerId: 1000,
|
||||
Length: 10,
|
||||
}
|
||||
expIDMap := runtimespec.LinuxIDMapping{
|
||||
HostID: 1000,
|
||||
ContainerID: 1000,
|
||||
Size: 10,
|
||||
}
|
||||
|
||||
for desc, test := range map[string]struct {
|
||||
configChange func(*runtime.PodSandboxConfig)
|
||||
specCheck func(*testing.T, *runtimespec.Spec)
|
||||
@ -122,6 +133,9 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
|
||||
})
|
||||
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
|
||||
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
|
||||
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
})
|
||||
},
|
||||
},
|
||||
"host namespace": {
|
||||
@ -149,10 +163,113 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
|
||||
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.IPCNamespace,
|
||||
})
|
||||
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
})
|
||||
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
|
||||
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
|
||||
},
|
||||
},
|
||||
"user namespace": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_POD,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
|
||||
require.NotNil(t, spec.Linux)
|
||||
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
|
||||
Type: runtimespec.UserNamespace,
|
||||
})
|
||||
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
|
||||
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
|
||||
|
||||
},
|
||||
},
|
||||
"user namespace mode node and mappings": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace with several mappings": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace with uneven mappings": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_NODE,
|
||||
Uids: []*runtime.IDMapping{&idMap, &idMap},
|
||||
Gids: []*runtime.IDMapping{&idMap},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace mode container": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_CONTAINER,
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace mode target": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode_TARGET,
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"user namespace unknown mode": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
NamespaceOptions: &runtime.NamespaceOption{
|
||||
UsernsOptions: &runtime.UserNamespace{
|
||||
Mode: runtime.NamespaceMode(100),
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
"should set supplemental groups correctly": {
|
||||
configChange: func(c *runtime.PodSandboxConfig) {
|
||||
c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
|
||||
|
@ -21,6 +21,7 @@ package server
|
||||
import (
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@ -56,3 +57,9 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
||||
|
||||
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
|
||||
}
|
||||
|
||||
// sandboxSnapshotterOpts generates any platform specific snapshotter options
|
||||
// for a sandbox container.
|
||||
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
|
||||
return []snapshots.Opt{}, nil
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@ -116,3 +117,8 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
||||
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
|
||||
spec.Windows.Network.NetworkNamespace = nsPath
|
||||
}
|
||||
|
||||
// No sandbox snapshotter options needed for windows.
|
||||
func sandboxSnapshotterOpts(config *runtime.PodSandboxConfig) ([]snapshots.Opt, error) {
|
||||
return []snapshots.Opt{}, nil
|
||||
}
|
||||
|
@ -50,7 +50,9 @@ import (
|
||||
|
||||
// newNS creates a new persistent (bind-mounted) network namespace and returns the
|
||||
// path to the network namespace.
|
||||
func newNS(baseDir string) (nsPath string, err error) {
|
||||
// If pid is not 0, returns the netns from that pid persistently mounted. Otherwise,
|
||||
// a new netns is created.
|
||||
func newNS(baseDir string, pid uint32) (nsPath string, err error) {
|
||||
b := make([]byte, 16)
|
||||
|
||||
_, err = rand.Read(b)
|
||||
@ -81,6 +83,16 @@ func newNS(baseDir string) (nsPath string, err error) {
|
||||
}
|
||||
}()
|
||||
|
||||
if pid != 0 {
|
||||
procNsPath := getNetNSPathFromPID(pid)
|
||||
// bind mount the netns onto the mount point. This causes the namespace
|
||||
// to persist, even when there are no threads in the ns.
|
||||
if err = unix.Mount(procNsPath, nsPath, "none", unix.MS_BIND, ""); err != nil {
|
||||
return "", fmt.Errorf("failed to bind mount ns src: %v at %s: %w", procNsPath, nsPath, err)
|
||||
}
|
||||
return nsPath, nil
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
|
||||
@ -155,6 +167,10 @@ func getCurrentThreadNetNSPath() string {
|
||||
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
|
||||
}
|
||||
|
||||
func getNetNSPathFromPID(pid uint32) string {
|
||||
return fmt.Sprintf("/proc/%d/ns/net", pid)
|
||||
}
|
||||
|
||||
// NetNS holds network namespace.
|
||||
type NetNS struct {
|
||||
path string
|
||||
@ -162,7 +178,12 @@ type NetNS struct {
|
||||
|
||||
// NewNetNS creates a network namespace.
|
||||
func NewNetNS(baseDir string) (*NetNS, error) {
|
||||
path, err := newNS(baseDir)
|
||||
return NewNetNSFromPID(baseDir, 0)
|
||||
}
|
||||
|
||||
// NewNetNS returns the netns from pid or a new netns if pid is 0.
|
||||
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
|
||||
path, err := newNS(baseDir, pid)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to setup netns: %w", err)
|
||||
}
|
||||
|
@ -35,6 +35,11 @@ func NewNetNS(baseDir string) (*NetNS, error) {
|
||||
return nil, errNotImplementedOnUnix
|
||||
}
|
||||
|
||||
// NewNetNS returns the netns from pid or a new netns if pid is 0.
|
||||
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
|
||||
return nil, errNotImplementedOnUnix
|
||||
}
|
||||
|
||||
// LoadNetNS loads existing network namespace.
|
||||
func LoadNetNS(path string) *NetNS {
|
||||
return &NetNS{path: path}
|
||||
|
@ -16,14 +16,20 @@
|
||||
|
||||
package netns
|
||||
|
||||
import "github.com/Microsoft/hcsshim/hcn"
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/Microsoft/hcsshim/hcn"
|
||||
)
|
||||
|
||||
var errNotImplementedOnWindows = errors.New("not implemented on windows")
|
||||
|
||||
// NetNS holds network namespace for sandbox
|
||||
type NetNS struct {
|
||||
path string
|
||||
}
|
||||
|
||||
// NewNetNS creates a network namespace for the sandbox
|
||||
// NewNetNS creates a network namespace for the sandbox.
|
||||
func NewNetNS(baseDir string) (*NetNS, error) {
|
||||
temp := hcn.HostComputeNamespace{}
|
||||
hcnNamespace, err := temp.Create()
|
||||
@ -34,6 +40,11 @@ func NewNetNS(baseDir string) (*NetNS, error) {
|
||||
return &NetNS{path: hcnNamespace.Id}, nil
|
||||
}
|
||||
|
||||
// NewNetNS returns the netns from pid or a new netns if pid is 0.
|
||||
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
|
||||
return nil, errNotImplementedOnWindows
|
||||
}
|
||||
|
||||
// LoadNetNS loads existing network namespace.
|
||||
func LoadNetNS(path string) *NetNS {
|
||||
return &NetNS{path: path}
|
||||
|
@ -33,6 +33,11 @@ const (
|
||||
UnpackKeyFormat = UnpackKeyPrefix + "-%s %s"
|
||||
inheritedLabelsPrefix = "containerd.io/snapshot/"
|
||||
labelSnapshotRef = "containerd.io/snapshot.ref"
|
||||
|
||||
// LabelSnapshotUIDMapping is the label used for UID mappings
|
||||
LabelSnapshotUIDMapping = "containerd.io/snapshot/uidmapping"
|
||||
// LabelSnapshotGIDMapping is the label used for GID mappings
|
||||
LabelSnapshotGIDMapping = "containerd.io/snapshot/gidmapping"
|
||||
)
|
||||
|
||||
// Kind identifies the kind of snapshot.
|
||||
|
@ -19,17 +19,92 @@
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
)
|
||||
|
||||
const (
|
||||
capabRemapIDs = "remap-ids"
|
||||
)
|
||||
|
||||
// WithRemapperLabels creates the labels used by any supporting snapshotter
|
||||
// to shift the filesystem ownership (user namespace mapping) automatically; currently
|
||||
// supported by the fuse-overlayfs snapshotter
|
||||
func WithRemapperLabels(ctrUID, hostUID, ctrGID, hostGID, length uint32) snapshots.Opt {
|
||||
return snapshots.WithLabels(map[string]string{
|
||||
"containerd.io/snapshot/uidmapping": fmt.Sprintf("%d:%d:%d", ctrUID, hostUID, length),
|
||||
"containerd.io/snapshot/gidmapping": fmt.Sprintf("%d:%d:%d", ctrGID, hostGID, length),
|
||||
})
|
||||
snapshots.LabelSnapshotUIDMapping: fmt.Sprintf("%d:%d:%d", ctrUID, hostUID, length),
|
||||
snapshots.LabelSnapshotGIDMapping: fmt.Sprintf("%d:%d:%d", ctrGID, hostGID, length)})
|
||||
}
|
||||
|
||||
func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName string, snapshotter snapshots.Snapshotter, parent string, opts ...snapshots.Opt) (string, error) {
|
||||
capabs, err := client.GetSnapshotterCapabilities(ctx, snapshotterName)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for _, capab := range capabs {
|
||||
if capab == capabRemapIDs {
|
||||
// Snapshotter supports ID remapping, we don't need to do anything.
|
||||
return parent, nil
|
||||
}
|
||||
}
|
||||
|
||||
var local snapshots.Info
|
||||
for _, opt := range opts {
|
||||
opt(&local)
|
||||
}
|
||||
|
||||
needsRemap := false
|
||||
var uidMap, gidMap string
|
||||
|
||||
if value, ok := local.Labels[snapshots.LabelSnapshotUIDMapping]; ok {
|
||||
needsRemap = true
|
||||
uidMap = value
|
||||
}
|
||||
if value, ok := local.Labels[snapshots.LabelSnapshotGIDMapping]; ok {
|
||||
needsRemap = true
|
||||
gidMap = value
|
||||
}
|
||||
|
||||
if !needsRemap {
|
||||
return parent, nil
|
||||
}
|
||||
|
||||
var ctrUID, hostUID, length uint32
|
||||
_, err = fmt.Sscanf(uidMap, "%d:%d:%d", &ctrUID, &hostUID, &length)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("uidMap unparsable: %w", err)
|
||||
}
|
||||
|
||||
var ctrGID, hostGID, lengthGID uint32
|
||||
_, err = fmt.Sscanf(gidMap, "%d:%d:%d", &ctrGID, &hostGID, &lengthGID)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("gidMap unparsable: %w", err)
|
||||
}
|
||||
|
||||
if ctrUID != 0 || ctrGID != 0 {
|
||||
return "", fmt.Errorf("Container UID/GID of 0 only supported currently (%d/%d)", ctrUID, ctrGID)
|
||||
}
|
||||
|
||||
// TODO(dgl): length isn't taken into account for the intermediate snapshot id.
|
||||
usernsID := fmt.Sprintf("%s-%d-%d", parent, hostUID, hostGID)
|
||||
if _, err := snapshotter.Stat(ctx, usernsID); err == nil {
|
||||
return usernsID, nil
|
||||
}
|
||||
mounts, err := snapshotter.Prepare(ctx, usernsID+"-remap", parent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// TODO(dgl): length isn't taken into account here yet either.
|
||||
if err := remapRootFS(ctx, mounts, hostUID, hostGID); err != nil {
|
||||
snapshotter.Remove(ctx, usernsID+"-remap")
|
||||
return "", err
|
||||
}
|
||||
if err := snapshotter.Commit(ctx, usernsID, usernsID+"-remap"); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return usernsID, nil
|
||||
}
|
||||
|
27
snapshotter_opts_windows.go
Normal file
27
snapshotter_opts_windows.go
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
)
|
||||
|
||||
func resolveSnapshotOptions(ctx context.Context, client *Client, snapshotterName string, snapshotter snapshots.Snapshotter, parent string, opts ...snapshots.Opt) (string, error) {
|
||||
return parent, nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user