cri: Devices ownership from SecurityContext

CRI container runtimes mount devices (set via kubernetes device plugins)
to containers by taking the host user/group IDs (uid/gid) to the
corresponding container device.

This triggers a problem when trying to run those containers with
non-zero (root uid/gid = 0) uid/gid set via runAsUser/runAsGroup:
the container process has no permission to use the device even when
its gid is permissive to non-root users because the container user
does not belong to that group.

It is possible to workaround the problem by manually adding the device
gid(s) to supplementalGroups. However, this is also problematic because
the device gid(s) may have different values depending on the workers'
distro/version in the cluster.

This patch suggests to take RunAsUser/RunAsGroup set via SecurityContext
as the device UID/GID, respectively. The feature must be enabled by
setting device_ownership_from_security_context runtime config value to
true (valid on Linux only).

Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
This commit is contained in:
Mikko Ylinen
2021-03-09 09:33:03 +02:00
parent af1a0908d0
commit e0f8c04dad
5 changed files with 115 additions and 4 deletions

View File

@@ -285,8 +285,30 @@ func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, erro
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
}
// getDeviceUserGroupID() is used to find the right uid/gid
// value for the device node created in the container namespace.
// The runtime executes mknod() and chmod()s the created
// device with the values returned here.
//
// On Linux, uid and gid are sufficient and the user/groupname do not
// need to be resolved.
//
// TODO(mythi): In case of user namespaces, the runtime simply bind
// mounts the devices from the host. Additional logic is needed
// to check that the runtimes effective UID/GID on the host has the
// permissions to access the device node and/or the right user namespace
// mappings are created.
//
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
if runAsVal != nil {
return uint32(runAsVal.GetValue())
}
return 0
}
// WithDevices sets the provided devices onto the container spec
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
@@ -295,6 +317,8 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
s.Linux.Resources = &runtimespec.LinuxResources{}
}
oldDevices := len(s.Linux.Devices)
for _, device := range config.GetDevices() {
path, err := osi.ResolveSymbolicLink(device.HostPath)
if err != nil {
@@ -306,6 +330,24 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
return err
}
}
if enableDeviceOwnershipFromSecurityContext {
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
// Loop all new devices added by oci.WithDevices() to update their
// dev.UID/dev.GID.
//
// non-zero UID/GID from SecurityContext is used to override host's
// device UID/GID for the container.
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
if UID != 0 {
*s.Linux.Devices[idx].UID = UID
}
if GID != 0 {
*s.Linux.Devices[idx].GID = GID
}
}
}
return nil
}
}