cri: Devices ownership from SecurityContext
CRI container runtimes mount devices (set via kubernetes device plugins) to containers by taking the host user/group IDs (uid/gid) to the corresponding container device. This triggers a problem when trying to run those containers with non-zero (root uid/gid = 0) uid/gid set via runAsUser/runAsGroup: the container process has no permission to use the device even when its gid is permissive to non-root users because the container user does not belong to that group. It is possible to workaround the problem by manually adding the device gid(s) to supplementalGroups. However, this is also problematic because the device gid(s) may have different values depending on the workers' distro/version in the cluster. This patch suggests to take RunAsUser/RunAsGroup set via SecurityContext as the device UID/GID, respectively. The feature must be enabled by setting device_ownership_from_security_context runtime config value to true (valid on Linux only). Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
This commit is contained in:
parent
af1a0908d0
commit
e0f8c04dad
@ -37,7 +37,7 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
|
||||
// WithDevices recursively adds devices from the passed in path.
|
||||
// If devicePath is a dir it traverses the dir to add all devices in that dir.
|
||||
// If devicePath is not a dir, it attempts to add the single device.
|
||||
func WithDevices(devicePath, containerPath, permissions string) SpecOpts {
|
||||
|
@ -258,6 +258,9 @@ type PluginConfig struct {
|
||||
// present in /sys/fs/cgroup/cgroup.controllers.
|
||||
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
|
||||
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
|
||||
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
|
||||
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
|
||||
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
|
||||
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
|
||||
// isolation, security and early detection of issues in the mount configuration when using
|
||||
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
|
||||
|
@ -285,8 +285,30 @@ func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, erro
|
||||
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
|
||||
}
|
||||
|
||||
// getDeviceUserGroupID() is used to find the right uid/gid
|
||||
// value for the device node created in the container namespace.
|
||||
// The runtime executes mknod() and chmod()s the created
|
||||
// device with the values returned here.
|
||||
//
|
||||
// On Linux, uid and gid are sufficient and the user/groupname do not
|
||||
// need to be resolved.
|
||||
//
|
||||
// TODO(mythi): In case of user namespaces, the runtime simply bind
|
||||
// mounts the devices from the host. Additional logic is needed
|
||||
// to check that the runtimes effective UID/GID on the host has the
|
||||
// permissions to access the device node and/or the right user namespace
|
||||
// mappings are created.
|
||||
//
|
||||
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
|
||||
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
|
||||
if runAsVal != nil {
|
||||
return uint32(runAsVal.GetValue())
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// WithDevices sets the provided devices onto the container spec
|
||||
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
|
||||
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
@ -295,6 +317,8 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
|
||||
s.Linux.Resources = &runtimespec.LinuxResources{}
|
||||
}
|
||||
|
||||
oldDevices := len(s.Linux.Devices)
|
||||
|
||||
for _, device := range config.GetDevices() {
|
||||
path, err := osi.ResolveSymbolicLink(device.HostPath)
|
||||
if err != nil {
|
||||
@ -306,6 +330,24 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if enableDeviceOwnershipFromSecurityContext {
|
||||
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
|
||||
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
|
||||
// Loop all new devices added by oci.WithDevices() to update their
|
||||
// dev.UID/dev.GID.
|
||||
//
|
||||
// non-zero UID/GID from SecurityContext is used to override host's
|
||||
// device UID/GID for the container.
|
||||
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
|
||||
if UID != 0 {
|
||||
*s.Linux.Devices[idx].UID = UID
|
||||
}
|
||||
if GID != 0 {
|
||||
*s.Linux.Devices[idx].GID = GID
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -222,11 +222,11 @@ func (c *criService) containerSpec(
|
||||
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
|
||||
} else {
|
||||
// add requested devices by the config as host devices are not automatically added
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
|
||||
customopts.WithCapabilities(securityContext, c.allCaps))
|
||||
}
|
||||
} else { // not privileged
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
|
||||
customopts.WithCapabilities(securityContext, c.allCaps))
|
||||
}
|
||||
|
||||
|
@ -1304,6 +1304,72 @@ func TestGenerateUserString(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNonRootUserAndDevices(t *testing.T) {
|
||||
testPid := uint32(1234)
|
||||
c := newTestCRIService()
|
||||
testSandboxID := "sandbox-id"
|
||||
testContainerName := "container-name"
|
||||
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
|
||||
|
||||
hostDevicesRaw, err := oci.HostDevices()
|
||||
assert.NoError(t, err)
|
||||
|
||||
testDevice := hostDevicesRaw[0]
|
||||
|
||||
for desc, test := range map[string]struct {
|
||||
uid, gid *runtime.Int64Value
|
||||
deviceOwnershipFromSecurityContext bool
|
||||
expectedDeviceUID uint32
|
||||
expectedDeviceGID uint32
|
||||
}{
|
||||
"expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
|
||||
uid: &runtime.Int64Value{Value: 1},
|
||||
gid: &runtime.Int64Value{Value: 10},
|
||||
expectedDeviceUID: *testDevice.UID,
|
||||
expectedDeviceGID: *testDevice.GID,
|
||||
},
|
||||
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
|
||||
uid: &runtime.Int64Value{Value: 0},
|
||||
gid: &runtime.Int64Value{Value: 0},
|
||||
expectedDeviceUID: *testDevice.UID,
|
||||
expectedDeviceGID: *testDevice.GID,
|
||||
},
|
||||
"expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled": {
|
||||
uid: &runtime.Int64Value{Value: 1},
|
||||
gid: &runtime.Int64Value{Value: 10},
|
||||
deviceOwnershipFromSecurityContext: true,
|
||||
expectedDeviceUID: 1,
|
||||
expectedDeviceGID: 10,
|
||||
},
|
||||
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled": {
|
||||
uid: &runtime.Int64Value{Value: 0},
|
||||
gid: &runtime.Int64Value{Value: 0},
|
||||
deviceOwnershipFromSecurityContext: true,
|
||||
expectedDeviceUID: *testDevice.UID,
|
||||
expectedDeviceGID: *testDevice.GID,
|
||||
},
|
||||
} {
|
||||
t.Logf("TestCase %q", desc)
|
||||
|
||||
c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext
|
||||
containerConfig.Linux.SecurityContext.RunAsUser = test.uid
|
||||
containerConfig.Linux.SecurityContext.RunAsGroup = test.gid
|
||||
containerConfig.Devices = []*runtime.Device{
|
||||
{
|
||||
ContainerPath: testDevice.Path,
|
||||
HostPath: testDevice.Path,
|
||||
Permissions: "r",
|
||||
},
|
||||
}
|
||||
|
||||
spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID)
|
||||
assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrivilegedDevices(t *testing.T) {
|
||||
testPid := uint32(1234)
|
||||
c := newTestCRIService()
|
||||
|
Loading…
Reference in New Issue
Block a user