diff --git a/pkg/cri/sbserver/container_create.go b/pkg/cri/sbserver/container_create.go index f3a241369..ad523a520 100644 --- a/pkg/cri/sbserver/container_create.go +++ b/pkg/cri/sbserver/container_create.go @@ -157,7 +157,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta var volumeMounts []*runtime.Mount if !c.config.IgnoreImageDefinedVolumes { // Create container image volumes mounts. - volumeMounts = c.volumeMounts(platform, containerRootDir, config.GetMounts(), &image.ImageSpec.Config) + volumeMounts = c.volumeMounts(platform, containerRootDir, config, &image.ImageSpec.Config) } else if len(image.ImageSpec.Config.Volumes) != 0 { log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID) } @@ -341,7 +341,17 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta // volumeMounts sets up image volumes for container. Rely on the removal of container // root directory to do cleanup. Note that image volume will be skipped, if there is criMounts // specified with the same destination. -func (c *criService) volumeMounts(platform platforms.Platform, containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount { +func (c *criService) volumeMounts(platform platforms.Platform, containerRootDir string, containerConfig *runtime.ContainerConfig, config *imagespec.ImageConfig) []*runtime.Mount { + var uidMappings, gidMappings []*runtime.IDMapping + if platform.OS == "linux" { + if usernsOpts := containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { + uidMappings = usernsOpts.GetUids() + gidMappings = usernsOpts.GetGids() + } + } + + criMounts := containerConfig.GetMounts() + if len(config.Volumes) == 0 { return nil } @@ -371,6 +381,8 @@ func (c *criService) volumeMounts(platform platforms.Platform, containerRootDir ContainerPath: dst, HostPath: src, SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, }) } return mounts @@ -966,11 +978,17 @@ func (c *criService) buildDarwinSpec( return specOpts, nil } -// containerMounts sets up necessary container system file mounts +// linuxContainerMounts sets up necessary container system file mounts // including /dev/shm, /etc/hosts and /etc/resolv.conf. func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount { var mounts []*runtime.Mount securityContext := config.GetLinux().GetSecurityContext() + var uidMappings, gidMappings []*runtime.IDMapping + if usernsOpts := securityContext.GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil { + uidMappings = usernsOpts.GetUids() + gidMappings = usernsOpts.GetGids() + } + if !isInCRIMounts(etcHostname, config.GetMounts()) { // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3. // For in-place upgrade, the old sandbox doesn't have the hostname file, @@ -984,6 +1002,8 @@ func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.Cont HostPath: hostpath, Readonly: securityContext.GetReadonlyRootfs(), SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, }) } } @@ -994,6 +1014,8 @@ func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.Cont HostPath: c.getSandboxHosts(sandboxID), Readonly: securityContext.GetReadonlyRootfs(), SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, }) } @@ -1005,6 +1027,8 @@ func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.Cont HostPath: c.getResolvPath(sandboxID), Readonly: securityContext.GetReadonlyRootfs(), SelinuxRelabel: true, + UidMappings: uidMappings, + GidMappings: gidMappings, }) } @@ -1018,6 +1042,17 @@ func (c *criService) linuxContainerMounts(sandboxID string, config *runtime.Cont HostPath: sandboxDevShm, Readonly: false, SelinuxRelabel: sandboxDevShm != devShm, + // XXX: tmpfs support for idmap mounts got merged in + // Linux 6.3. + // Our Ubuntu 22.04 CI runs with 5.15 kernels, so + // disabling idmap mounts for this case makes the CI + // happy (the other fs used support idmap mounts in 5.15 + // kernels). + // We can enable this at a later stage, but as this + // tmpfs mount is exposed empty to the container (no + // prepopulated files) and using the hostIPC with userns + // is blocked by k8s, we can just avoid using the + // mappings and it should work fine. }) } return mounts diff --git a/pkg/cri/sbserver/container_create_test.go b/pkg/cri/sbserver/container_create_test.go index 3f6cb067c..bbc21f209 100644 --- a/pkg/cri/sbserver/container_create_test.go +++ b/pkg/cri/sbserver/container_create_test.go @@ -231,12 +231,22 @@ func TestContainerSpecCommand(t *testing.T) { func TestVolumeMounts(t *testing.T) { testContainerRootDir := "test-container-root" + idmap := []*runtime.IDMapping{ + { + ContainerId: 0, + HostId: 100, + Length: 1, + }, + } + for _, test := range []struct { desc string platform platforms.Platform criMounts []*runtime.Mount + usernsEnabled bool imageVolumes map[string]struct{} expectedMountDest []string + expectedMappings []*runtime.IDMapping }{ { desc: "should setup rw mount for image volumes", @@ -297,25 +307,88 @@ func TestVolumeMounts(t *testing.T) { "/abs/test-volume-4", }, }, + { + desc: "should include mappings for image volumes on Linux", + platform: platforms.Platform{OS: "linux"}, + usernsEnabled: true, + imageVolumes: map[string]struct{}{ + "/test-volume-1/": {}, + "/test-volume-2/": {}, + }, + expectedMountDest: []string{ + "/test-volume-2/", + "/test-volume-2/", + }, + expectedMappings: idmap, + }, + { + desc: "should NOT include mappings for image volumes on Linux if !userns", + platform: platforms.Platform{OS: "linux"}, + usernsEnabled: false, + imageVolumes: map[string]struct{}{ + "/test-volume-1/": {}, + "/test-volume-2/": {}, + }, + expectedMountDest: []string{ + "/test-volume-2/", + "/test-volume-2/", + }, + }, + { + desc: "should convert rel imageVolume paths to abs paths and add userns mappings", + platform: platforms.Platform{OS: "linux"}, + usernsEnabled: true, + imageVolumes: map[string]struct{}{ + "test-volume-1/": {}, + "C:/test-volume-2/": {}, + "../../test-volume-3/": {}, + }, + expectedMountDest: []string{ + "/test-volume-1", + "/C:/test-volume-2", + "/test-volume-3", + }, + expectedMappings: idmap, + }, } { test := test t.Run(test.desc, func(t *testing.T) { config := &imagespec.ImageConfig{ Volumes: test.imageVolumes, } + containerConfig := &runtime.ContainerConfig{Mounts: test.criMounts} + if test.usernsEnabled { + containerConfig.Linux = &runtime.LinuxContainerConfig{ + SecurityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: idmap, + Gids: idmap, + }, + }, + }, + } + } + c := newTestCRIService() - got := c.volumeMounts(test.platform, testContainerRootDir, test.criMounts, config) + got := c.volumeMounts(test.platform, testContainerRootDir, containerConfig, config) assert.Len(t, got, len(test.expectedMountDest)) for _, dest := range test.expectedMountDest { found := false for _, m := range got { - if m.ContainerPath == dest { - found = true - assert.Equal(t, - filepath.Dir(m.HostPath), - filepath.Join(testContainerRootDir, "volumes")) - break + if m.ContainerPath != dest { + continue } + found = true + assert.Equal(t, + filepath.Dir(m.HostPath), + filepath.Join(testContainerRootDir, "volumes")) + if test.expectedMappings != nil { + assert.Equal(t, test.expectedMappings, m.UidMappings) + assert.Equal(t, test.expectedMappings, m.GidMappings) + } + break } assert.True(t, found) } @@ -481,6 +554,14 @@ func TestBaseRuntimeSpec(t *testing.T) { func TestLinuxContainerMounts(t *testing.T) { const testSandboxID = "test-id" + idmap := []*runtime.IDMapping{ + { + ContainerId: 0, + HostId: 100, + Length: 1, + }, + } + for _, test := range []struct { desc string statFn func(string) (os.FileInfo, error) @@ -550,6 +631,50 @@ func TestLinuxContainerMounts(t *testing.T) { }, }, }, + { + desc: "should setup uidMappings/gidMappings when userns is used", + securityContext: &runtime.LinuxContainerSecurityContext{ + NamespaceOptions: &runtime.NamespaceOption{ + UsernsOptions: &runtime.UserNamespace{ + Mode: runtime.NamespaceMode_POD, + Uids: idmap, + Gids: idmap, + }, + }, + }, + expectedMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hostname", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), + Readonly: false, + SelinuxRelabel: true, + UidMappings: idmap, + GidMappings: idmap, + }, + { + ContainerPath: "/etc/hosts", + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"), + Readonly: false, + SelinuxRelabel: true, + UidMappings: idmap, + GidMappings: idmap, + }, + { + ContainerPath: resolvConfPath, + HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"), + Readonly: false, + SelinuxRelabel: true, + UidMappings: idmap, + GidMappings: idmap, + }, + { + ContainerPath: "/dev/shm", + HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"), + Readonly: false, + SelinuxRelabel: true, + }, + }, + }, { desc: "should use host /dev/shm when host ipc is set", securityContext: &runtime.LinuxContainerSecurityContext{