Merge pull request #11035 from k8s-infra-cherrypick-robot/cherry-pick-10906-to-release/2.0
[release/2.0] *: should align pipe's owner with init process
This commit is contained in:
commit
952ad6b3a6
@ -38,6 +38,7 @@ type ImageList struct {
|
||||
VolumeOwnership string
|
||||
ArgsEscaped string
|
||||
DockerSchema1 string
|
||||
Nginx string
|
||||
}
|
||||
|
||||
var (
|
||||
@ -57,6 +58,7 @@ func initImages(imageListFile string) {
|
||||
VolumeOwnership: "ghcr.io/containerd/volume-ownership:2.1",
|
||||
ArgsEscaped: "cplatpublic.azurecr.io/args-escaped-test-image-ns:1.0",
|
||||
DockerSchema1: "registry.k8s.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
|
||||
Nginx: "ghcr.io/containerd/nginx:1.27.0",
|
||||
}
|
||||
|
||||
if imageListFile != "" {
|
||||
@ -96,6 +98,8 @@ const (
|
||||
ArgsEscaped
|
||||
// DockerSchema1 image with docker schema 1
|
||||
DockerSchema1
|
||||
// Nginx image
|
||||
Nginx
|
||||
)
|
||||
|
||||
func initImageMap(imageList ImageList) map[int]string {
|
||||
@ -108,6 +112,7 @@ func initImageMap(imageList ImageList) map[int]string {
|
||||
images[VolumeOwnership] = imageList.VolumeOwnership
|
||||
images[ArgsEscaped] = imageList.ArgsEscaped
|
||||
images[DockerSchema1] = imageList.DockerSchema1
|
||||
images[Nginx] = imageList.Nginx
|
||||
return images
|
||||
}
|
||||
|
||||
|
@ -471,6 +471,24 @@ func WithDevice(containerPath, hostPath, permissions string) ContainerOpts {
|
||||
}
|
||||
}
|
||||
|
||||
// WithSELinuxOptions allows to set SELinux option for container.
|
||||
func WithSELinuxOptions(user, role, typ, level string) ContainerOpts {
|
||||
return func(c *runtime.ContainerConfig) {
|
||||
if c.Linux == nil {
|
||||
c.Linux = &runtime.LinuxContainerConfig{}
|
||||
}
|
||||
if c.Linux.SecurityContext == nil {
|
||||
c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
|
||||
}
|
||||
c.Linux.SecurityContext.SelinuxOptions = &runtime.SELinuxOption{
|
||||
User: user,
|
||||
Role: role,
|
||||
Type: typ,
|
||||
Level: level,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ContainerConfig creates a container config given a name and image name
|
||||
// and additional container config options
|
||||
func ContainerConfig(name, image string, opts ...ContainerOpts) *runtime.ContainerConfig {
|
||||
|
@ -304,6 +304,125 @@ func TestPodUserNS(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestIssue10598 tests a case[1] that init processes in container should be able
|
||||
// to open /dev/stdout or /dev/stderr if init processes are running in their
|
||||
// user namespace instead of root user.
|
||||
//
|
||||
// The shim server creates pipe for init processes' standard output. By default,
|
||||
// the owner of pipe is the same to shim server (root user). Let's say, the init
|
||||
// process is running with uid=1000/gid=1000 user. Init processes inherits the
|
||||
// pipe created by shim server so that it can just write data into that pipe.
|
||||
// However, if that init process tries to open /dev/stderr, the kernel will
|
||||
// return no permission error.
|
||||
//
|
||||
// The following output is from retsnoop[2].
|
||||
//
|
||||
// → do_open
|
||||
// → inode_permission
|
||||
// → generic_permission
|
||||
// ↔ make_vfsuid [0] 0.500us
|
||||
// ↔ make_vfsuid [0] 6.501us
|
||||
// ↔ from_kuid [0xffffffff] 0.700us
|
||||
// ← generic_permission [-EACCES] 13.501us
|
||||
//
|
||||
// Since uid_map/gid_map doesn't cover uid=0/gid=0, the kernel can't convert
|
||||
// uid=0 into valid uid in that uid_map. So, `from_kuid` returns invalid uid
|
||||
// value and then `do_open` returns EACCES error.
|
||||
//
|
||||
// [1]: https://github.com/containerd/containerd/issues/10598
|
||||
// [2]: https://github.com/anakryiko/retsnoop
|
||||
func TestIssue10598(t *testing.T) {
|
||||
if !supportsUserNS() {
|
||||
t.Skip("User namespaces are not supported")
|
||||
}
|
||||
if !supportsIDMap(defaultRoot) {
|
||||
t.Skipf("ID mappings are not supported on: %v", defaultRoot)
|
||||
}
|
||||
if err := supportsRuncIDMap(); err != nil {
|
||||
t.Skipf("OCI runtime doesn't support idmap mounts: %v", err)
|
||||
}
|
||||
|
||||
testPodLogDir := t.TempDir()
|
||||
|
||||
containerID := uint32(0)
|
||||
hostID := uint32(65536)
|
||||
size := uint32(65536)
|
||||
|
||||
t.Log("Create a sandbox with userns")
|
||||
sandboxOpts := []PodSandboxOpts{
|
||||
WithPodUserNs(containerID, hostID, size),
|
||||
WithPodLogDirectory(testPodLogDir),
|
||||
}
|
||||
sbConfig := PodSandboxConfig("issue10598", "userns", sandboxOpts...)
|
||||
sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Make sure the sandbox is cleaned up.
|
||||
defer func() {
|
||||
assert.NoError(t, runtimeService.StopPodSandbox(sb))
|
||||
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
|
||||
}()
|
||||
|
||||
t.Log("Create a container for userns")
|
||||
|
||||
containerName := "nginx-userns"
|
||||
testImage := images.Get(images.Nginx)
|
||||
|
||||
EnsureImageExists(t, testImage)
|
||||
|
||||
containerOpts := []ContainerOpts{
|
||||
WithUserNamespace(containerID, hostID, size),
|
||||
WithLogPath(containerName),
|
||||
// The SELinux policy enforced by container-selinux prevents
|
||||
// NGINX from opening the /proc/self/fd/2 pipe. This scenario
|
||||
// is not intended to verify SELinux behavior in the user namespace
|
||||
// but rather to confirm the ownership of the standard output
|
||||
// file descriptor. The following option demonstrates how to
|
||||
// disable the restrictive SELinux rule for the NGINX process.
|
||||
WithSELinuxOptions(
|
||||
"unconfined_u",
|
||||
"unconfined_r",
|
||||
"container_runtime_t",
|
||||
"s0",
|
||||
),
|
||||
}
|
||||
|
||||
cnConfig := ContainerConfig(
|
||||
containerName,
|
||||
testImage,
|
||||
containerOpts...,
|
||||
)
|
||||
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Start the container")
|
||||
require.NoError(t, runtimeService.StartContainer(cn))
|
||||
|
||||
t.Log("Wait for container to start")
|
||||
require.NoError(t, Eventually(func() (bool, error) {
|
||||
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
s, err := runtimeService.ContainerStatus(cn)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if state := s.GetState(); state != runtime.ContainerState_CONTAINER_RUNNING {
|
||||
return false, fmt.Errorf("%s is not running\nstate: %s\nlog: %s",
|
||||
containerName, state, string(content))
|
||||
}
|
||||
|
||||
started := strings.Contains(string(content), "start worker processes")
|
||||
if started {
|
||||
t.Log(string(content))
|
||||
}
|
||||
return started, nil
|
||||
}, time.Second, 30*time.Second))
|
||||
}
|
||||
|
||||
func supportsRuncIDMap() error {
|
||||
var r runc.Runc
|
||||
features, err := r.Features(context.Background())
|
||||
|
@ -129,6 +129,12 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
|
||||
containerd.WithTaskAPIEndpoint(endpoint.Address, endpoint.Version))
|
||||
}
|
||||
|
||||
ioOwnerTaskOpts, err := updateContainerIOOwner(ctx, container, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to update container IO owner: %w", err)
|
||||
}
|
||||
taskOpts = append(taskOpts, ioOwnerTaskOpts...)
|
||||
|
||||
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create containerd task: %w", err)
|
||||
|
66
internal/cri/server/container_start_linux.go
Normal file
66
internal/cri/server/container_start_linux.go
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
containerd "github.com/containerd/containerd/v2/client"
|
||||
"github.com/containerd/containerd/v2/internal/userns"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
// updateContainerIOOwner updates I/O files' owner to align with initial processe's UID/GID.
|
||||
func updateContainerIOOwner(ctx context.Context, cntr containerd.Container, config *runtime.ContainerConfig) ([]containerd.NewTaskOpts, error) {
|
||||
if config.GetLinux() == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// FIXME(fuweid): Ideally, the pipe owner should be aligned with process owner.
|
||||
// No matter what user namespace container uses, it should work well. However,
|
||||
// it breaks the sig-node conformance case - [when querying /stats/summary should report resource usage through the stats api].
|
||||
// In order to keep compatible, the change should apply to user namespace only.
|
||||
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions() == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
spec, err := cntr.Spec(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get spec: %w", err)
|
||||
}
|
||||
|
||||
if spec.Linux == nil || spec.Process == nil {
|
||||
return nil, fmt.Errorf("invalid linux platform oci runtime spec")
|
||||
}
|
||||
|
||||
hostID, err := userns.IDMap{
|
||||
UidMap: spec.Linux.UIDMappings,
|
||||
GidMap: spec.Linux.GIDMappings,
|
||||
}.ToHost(userns.User{
|
||||
Uid: spec.Process.User.UID,
|
||||
Gid: spec.Process.User.GID,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to do idmap to get host ID: %w", err)
|
||||
}
|
||||
|
||||
return []containerd.NewTaskOpts{
|
||||
containerd.WithUIDOwner(hostID.Uid),
|
||||
containerd.WithGIDOwner(hostID.Gid),
|
||||
}, nil
|
||||
}
|
31
internal/cri/server/container_start_other.go
Normal file
31
internal/cri/server/container_start_other.go
Normal file
@ -0,0 +1,31 @@
|
||||
//go:build !linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
containerd "github.com/containerd/containerd/v2/client"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
// updateContainerIOOwner updates I/O files' owner to align with initial processe's UID/GID.
|
||||
func updateContainerIOOwner(ctx context.Context, cntr containerd.Container, config *runtime.ContainerConfig) ([]containerd.NewTaskOpts, error) {
|
||||
return nil, nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user