Merge pull request #11035 from k8s-infra-cherrypick-robot/cherry-pick-10906-to-release/2.0
[release/2.0] *: should align pipe's owner with init process
This commit is contained in:
commit
952ad6b3a6
@ -38,6 +38,7 @@ type ImageList struct {
|
|||||||
VolumeOwnership string
|
VolumeOwnership string
|
||||||
ArgsEscaped string
|
ArgsEscaped string
|
||||||
DockerSchema1 string
|
DockerSchema1 string
|
||||||
|
Nginx string
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -57,6 +58,7 @@ func initImages(imageListFile string) {
|
|||||||
VolumeOwnership: "ghcr.io/containerd/volume-ownership:2.1",
|
VolumeOwnership: "ghcr.io/containerd/volume-ownership:2.1",
|
||||||
ArgsEscaped: "cplatpublic.azurecr.io/args-escaped-test-image-ns:1.0",
|
ArgsEscaped: "cplatpublic.azurecr.io/args-escaped-test-image-ns:1.0",
|
||||||
DockerSchema1: "registry.k8s.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
|
DockerSchema1: "registry.k8s.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
|
||||||
|
Nginx: "ghcr.io/containerd/nginx:1.27.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
if imageListFile != "" {
|
if imageListFile != "" {
|
||||||
@ -96,6 +98,8 @@ const (
|
|||||||
ArgsEscaped
|
ArgsEscaped
|
||||||
// DockerSchema1 image with docker schema 1
|
// DockerSchema1 image with docker schema 1
|
||||||
DockerSchema1
|
DockerSchema1
|
||||||
|
// Nginx image
|
||||||
|
Nginx
|
||||||
)
|
)
|
||||||
|
|
||||||
func initImageMap(imageList ImageList) map[int]string {
|
func initImageMap(imageList ImageList) map[int]string {
|
||||||
@ -108,6 +112,7 @@ func initImageMap(imageList ImageList) map[int]string {
|
|||||||
images[VolumeOwnership] = imageList.VolumeOwnership
|
images[VolumeOwnership] = imageList.VolumeOwnership
|
||||||
images[ArgsEscaped] = imageList.ArgsEscaped
|
images[ArgsEscaped] = imageList.ArgsEscaped
|
||||||
images[DockerSchema1] = imageList.DockerSchema1
|
images[DockerSchema1] = imageList.DockerSchema1
|
||||||
|
images[Nginx] = imageList.Nginx
|
||||||
return images
|
return images
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -471,6 +471,24 @@ func WithDevice(containerPath, hostPath, permissions string) ContainerOpts {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithSELinuxOptions allows to set SELinux option for container.
|
||||||
|
func WithSELinuxOptions(user, role, typ, level string) ContainerOpts {
|
||||||
|
return func(c *runtime.ContainerConfig) {
|
||||||
|
if c.Linux == nil {
|
||||||
|
c.Linux = &runtime.LinuxContainerConfig{}
|
||||||
|
}
|
||||||
|
if c.Linux.SecurityContext == nil {
|
||||||
|
c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
|
||||||
|
}
|
||||||
|
c.Linux.SecurityContext.SelinuxOptions = &runtime.SELinuxOption{
|
||||||
|
User: user,
|
||||||
|
Role: role,
|
||||||
|
Type: typ,
|
||||||
|
Level: level,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ContainerConfig creates a container config given a name and image name
|
// ContainerConfig creates a container config given a name and image name
|
||||||
// and additional container config options
|
// and additional container config options
|
||||||
func ContainerConfig(name, image string, opts ...ContainerOpts) *runtime.ContainerConfig {
|
func ContainerConfig(name, image string, opts ...ContainerOpts) *runtime.ContainerConfig {
|
||||||
|
@ -304,6 +304,125 @@ func TestPodUserNS(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestIssue10598 tests a case[1] that init processes in container should be able
|
||||||
|
// to open /dev/stdout or /dev/stderr if init processes are running in their
|
||||||
|
// user namespace instead of root user.
|
||||||
|
//
|
||||||
|
// The shim server creates pipe for init processes' standard output. By default,
|
||||||
|
// the owner of pipe is the same to shim server (root user). Let's say, the init
|
||||||
|
// process is running with uid=1000/gid=1000 user. Init processes inherits the
|
||||||
|
// pipe created by shim server so that it can just write data into that pipe.
|
||||||
|
// However, if that init process tries to open /dev/stderr, the kernel will
|
||||||
|
// return no permission error.
|
||||||
|
//
|
||||||
|
// The following output is from retsnoop[2].
|
||||||
|
//
|
||||||
|
// → do_open
|
||||||
|
// → inode_permission
|
||||||
|
// → generic_permission
|
||||||
|
// ↔ make_vfsuid [0] 0.500us
|
||||||
|
// ↔ make_vfsuid [0] 6.501us
|
||||||
|
// ↔ from_kuid [0xffffffff] 0.700us
|
||||||
|
// ← generic_permission [-EACCES] 13.501us
|
||||||
|
//
|
||||||
|
// Since uid_map/gid_map doesn't cover uid=0/gid=0, the kernel can't convert
|
||||||
|
// uid=0 into valid uid in that uid_map. So, `from_kuid` returns invalid uid
|
||||||
|
// value and then `do_open` returns EACCES error.
|
||||||
|
//
|
||||||
|
// [1]: https://github.com/containerd/containerd/issues/10598
|
||||||
|
// [2]: https://github.com/anakryiko/retsnoop
|
||||||
|
func TestIssue10598(t *testing.T) {
|
||||||
|
if !supportsUserNS() {
|
||||||
|
t.Skip("User namespaces are not supported")
|
||||||
|
}
|
||||||
|
if !supportsIDMap(defaultRoot) {
|
||||||
|
t.Skipf("ID mappings are not supported on: %v", defaultRoot)
|
||||||
|
}
|
||||||
|
if err := supportsRuncIDMap(); err != nil {
|
||||||
|
t.Skipf("OCI runtime doesn't support idmap mounts: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
testPodLogDir := t.TempDir()
|
||||||
|
|
||||||
|
containerID := uint32(0)
|
||||||
|
hostID := uint32(65536)
|
||||||
|
size := uint32(65536)
|
||||||
|
|
||||||
|
t.Log("Create a sandbox with userns")
|
||||||
|
sandboxOpts := []PodSandboxOpts{
|
||||||
|
WithPodUserNs(containerID, hostID, size),
|
||||||
|
WithPodLogDirectory(testPodLogDir),
|
||||||
|
}
|
||||||
|
sbConfig := PodSandboxConfig("issue10598", "userns", sandboxOpts...)
|
||||||
|
sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Make sure the sandbox is cleaned up.
|
||||||
|
defer func() {
|
||||||
|
assert.NoError(t, runtimeService.StopPodSandbox(sb))
|
||||||
|
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
|
||||||
|
}()
|
||||||
|
|
||||||
|
t.Log("Create a container for userns")
|
||||||
|
|
||||||
|
containerName := "nginx-userns"
|
||||||
|
testImage := images.Get(images.Nginx)
|
||||||
|
|
||||||
|
EnsureImageExists(t, testImage)
|
||||||
|
|
||||||
|
containerOpts := []ContainerOpts{
|
||||||
|
WithUserNamespace(containerID, hostID, size),
|
||||||
|
WithLogPath(containerName),
|
||||||
|
// The SELinux policy enforced by container-selinux prevents
|
||||||
|
// NGINX from opening the /proc/self/fd/2 pipe. This scenario
|
||||||
|
// is not intended to verify SELinux behavior in the user namespace
|
||||||
|
// but rather to confirm the ownership of the standard output
|
||||||
|
// file descriptor. The following option demonstrates how to
|
||||||
|
// disable the restrictive SELinux rule for the NGINX process.
|
||||||
|
WithSELinuxOptions(
|
||||||
|
"unconfined_u",
|
||||||
|
"unconfined_r",
|
||||||
|
"container_runtime_t",
|
||||||
|
"s0",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
cnConfig := ContainerConfig(
|
||||||
|
containerName,
|
||||||
|
testImage,
|
||||||
|
containerOpts...,
|
||||||
|
)
|
||||||
|
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
t.Log("Start the container")
|
||||||
|
require.NoError(t, runtimeService.StartContainer(cn))
|
||||||
|
|
||||||
|
t.Log("Wait for container to start")
|
||||||
|
require.NoError(t, Eventually(func() (bool, error) {
|
||||||
|
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
s, err := runtimeService.ContainerStatus(cn)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if state := s.GetState(); state != runtime.ContainerState_CONTAINER_RUNNING {
|
||||||
|
return false, fmt.Errorf("%s is not running\nstate: %s\nlog: %s",
|
||||||
|
containerName, state, string(content))
|
||||||
|
}
|
||||||
|
|
||||||
|
started := strings.Contains(string(content), "start worker processes")
|
||||||
|
if started {
|
||||||
|
t.Log(string(content))
|
||||||
|
}
|
||||||
|
return started, nil
|
||||||
|
}, time.Second, 30*time.Second))
|
||||||
|
}
|
||||||
|
|
||||||
func supportsRuncIDMap() error {
|
func supportsRuncIDMap() error {
|
||||||
var r runc.Runc
|
var r runc.Runc
|
||||||
features, err := r.Features(context.Background())
|
features, err := r.Features(context.Background())
|
||||||
|
@ -129,6 +129,12 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
|
|||||||
containerd.WithTaskAPIEndpoint(endpoint.Address, endpoint.Version))
|
containerd.WithTaskAPIEndpoint(endpoint.Address, endpoint.Version))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ioOwnerTaskOpts, err := updateContainerIOOwner(ctx, container, config)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to update container IO owner: %w", err)
|
||||||
|
}
|
||||||
|
taskOpts = append(taskOpts, ioOwnerTaskOpts...)
|
||||||
|
|
||||||
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
|
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create containerd task: %w", err)
|
return nil, fmt.Errorf("failed to create containerd task: %w", err)
|
||||||
|
66
internal/cri/server/container_start_linux.go
Normal file
66
internal/cri/server/container_start_linux.go
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
containerd "github.com/containerd/containerd/v2/client"
|
||||||
|
"github.com/containerd/containerd/v2/internal/userns"
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// updateContainerIOOwner updates I/O files' owner to align with initial processe's UID/GID.
|
||||||
|
func updateContainerIOOwner(ctx context.Context, cntr containerd.Container, config *runtime.ContainerConfig) ([]containerd.NewTaskOpts, error) {
|
||||||
|
if config.GetLinux() == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME(fuweid): Ideally, the pipe owner should be aligned with process owner.
|
||||||
|
// No matter what user namespace container uses, it should work well. However,
|
||||||
|
// it breaks the sig-node conformance case - [when querying /stats/summary should report resource usage through the stats api].
|
||||||
|
// In order to keep compatible, the change should apply to user namespace only.
|
||||||
|
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
spec, err := cntr.Spec(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get spec: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if spec.Linux == nil || spec.Process == nil {
|
||||||
|
return nil, fmt.Errorf("invalid linux platform oci runtime spec")
|
||||||
|
}
|
||||||
|
|
||||||
|
hostID, err := userns.IDMap{
|
||||||
|
UidMap: spec.Linux.UIDMappings,
|
||||||
|
GidMap: spec.Linux.GIDMappings,
|
||||||
|
}.ToHost(userns.User{
|
||||||
|
Uid: spec.Process.User.UID,
|
||||||
|
Gid: spec.Process.User.GID,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to do idmap to get host ID: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return []containerd.NewTaskOpts{
|
||||||
|
containerd.WithUIDOwner(hostID.Uid),
|
||||||
|
containerd.WithGIDOwner(hostID.Gid),
|
||||||
|
}, nil
|
||||||
|
}
|
31
internal/cri/server/container_start_other.go
Normal file
31
internal/cri/server/container_start_other.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
//go:build !linux
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
containerd "github.com/containerd/containerd/v2/client"
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// updateContainerIOOwner updates I/O files' owner to align with initial processe's UID/GID.
|
||||||
|
func updateContainerIOOwner(ctx context.Context, cntr containerd.Container, config *runtime.ContainerConfig) ([]containerd.NewTaskOpts, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user