cri/server: Pass down uidMappings to OCI runtime

When the kubelet sends the uid/gid mappings for a mount, just pass them
down to the OCI runtime.

OCI runtimes support this since runc 1.2 and crun 1.8.1.

And whenever we add mounts (container mounts or image spec volumes) and
userns are requested by the kubelet, we use those mappings in the mounts
so the mounts are idmapped correctly. If no userns is used, we don't
send any mappings which just keeps the current behavior.

Signed-off-by: Rodrigo Campos <rodrigoca@microsoft.com>
This commit is contained in:
Rodrigo Campos 2023-01-23 18:35:28 +01:00
parent c07cadda3e
commit 97dfa7f556
4 changed files with 60 additions and 3 deletions

View File

@ -166,11 +166,34 @@ func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
return fmt.Errorf("idmap mounts not yet supported, but they were requested for: %q", src)
}
var uidMapping []runtimespec.LinuxIDMapping
if mount.UidMappings != nil {
for _, mapping := range mount.UidMappings {
uidMapping = append(uidMapping, runtimespec.LinuxIDMapping{
HostID: mapping.HostId,
ContainerID: mapping.ContainerId,
Size: mapping.Length,
})
}
}
var gidMapping []runtimespec.LinuxIDMapping
if mount.GidMappings != nil {
for _, mapping := range mount.GidMappings {
gidMapping = append(gidMapping, runtimespec.LinuxIDMapping{
HostID: mapping.HostId,
ContainerID: mapping.ContainerId,
Size: mapping.Length,
})
}
}
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: src,
Destination: dst,
Type: "bind",
Options: options,
UIDMappings: uidMapping,
GIDMappings: gidMapping,
})
}
return nil

View File

@ -143,7 +143,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
var volumeMounts []*runtime.Mount
if !c.config.IgnoreImageDefinedVolumes {
// Create container image volumes mounts.
volumeMounts = c.volumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
volumeMounts = c.volumeMounts(containerRootDir, config, &image.ImageSpec.Config)
} else if len(image.ImageSpec.Config.Volumes) != 0 {
log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID)
}
@ -318,10 +318,19 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
// volumeMounts sets up image volumes for container. Rely on the removal of container
// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
// specified with the same destination.
func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
func (c *criService) volumeMounts(containerRootDir string, containerConfig *runtime.ContainerConfig, config *imagespec.ImageConfig) []*runtime.Mount {
if len(config.Volumes) == 0 {
return nil
}
var uidMappings, gidMappings []*runtime.IDMapping
if goruntime.GOOS != "windows" {
if usernsOpts := containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil {
uidMappings = usernsOpts.GetUids()
gidMappings = usernsOpts.GetGids()
}
}
criMounts := containerConfig.GetMounts()
var mounts []*runtime.Mount
for dst := range config.Volumes {
if isInCRIMounts(dst, criMounts) {
@ -343,6 +352,8 @@ func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.
ContainerPath: dst,
HostPath: src,
SelinuxRelabel: true,
UidMappings: uidMappings,
GidMappings: gidMappings,
})
}
return mounts

View File

@ -62,6 +62,12 @@ const (
func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
var uidMappings, gidMappings []*runtime.IDMapping
if usernsOpts := securityContext.GetNamespaceOptions().GetUsernsOptions(); usernsOpts != nil {
uidMappings = usernsOpts.GetUids()
gidMappings = usernsOpts.GetGids()
}
if !isInCRIMounts(etcHostname, config.GetMounts()) {
// /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
// For in-place upgrade, the old sandbox doesn't have the hostname file,
@ -75,6 +81,8 @@ func (c *criService) containerMounts(sandboxID string, config *runtime.Container
HostPath: hostpath,
Readonly: securityContext.GetReadonlyRootfs(),
SelinuxRelabel: true,
UidMappings: uidMappings,
GidMappings: gidMappings,
})
}
}
@ -85,6 +93,8 @@ func (c *criService) containerMounts(sandboxID string, config *runtime.Container
HostPath: c.getSandboxHosts(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
SelinuxRelabel: true,
UidMappings: uidMappings,
GidMappings: gidMappings,
})
}
@ -96,6 +106,8 @@ func (c *criService) containerMounts(sandboxID string, config *runtime.Container
HostPath: c.getResolvPath(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
SelinuxRelabel: true,
UidMappings: uidMappings,
GidMappings: gidMappings,
})
}
@ -109,6 +121,16 @@ func (c *criService) containerMounts(sandboxID string, config *runtime.Container
HostPath: sandboxDevShm,
Readonly: false,
SelinuxRelabel: sandboxDevShm != devShm,
// XXX: tmpfs support for idmap mounts got merged in
// Linux 6.3.
// Our CI runs with 5.15 kernels, so disabling idmap
// mounts for this case makes the CI happy (the other fs
// used support idmap mounts in 5.15 kernels).
// We can enable this at a later stage, but as this
// tmpfs mount is exposed empty to the container (no
// prepopulated files) and using the hostIPC with userns
// is blocked by k8s, we can just avoid using the
// mappings and it should work fine.
})
}
return mounts

View File

@ -279,8 +279,9 @@ func TestVolumeMounts(t *testing.T) {
config := &imagespec.ImageConfig{
Volumes: test.imageVolumes,
}
containerConfig := &runtime.ContainerConfig{Mounts: test.criMounts}
c := newTestCRIService()
got := c.volumeMounts(testContainerRootDir, test.criMounts, config)
got := c.volumeMounts(testContainerRootDir, containerConfig, config)
assert.Len(t, got, len(test.expectedMountDest))
for _, dest := range test.expectedMountDest {
found := false