Merge pull request #5122 from mythi/non-root-containers-and-devices-v2

cri: Devices ownership from SecurityContext
This commit is contained in:
Mike Brown 2021-08-31 16:11:18 -05:00 committed by GitHub
commit 4bc5ca76e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 115 additions and 4 deletions

View File

@ -37,7 +37,7 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
return nil
}
// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
// WithDevices recursively adds devices from the passed in path.
// If devicePath is a dir it traverses the dir to add all devices in that dir.
// If devicePath is not a dir, it attempts to add the single device.
func WithDevices(devicePath, containerPath, permissions string) SpecOpts {

View File

@ -258,6 +258,9 @@ type PluginConfig struct {
// present in /sys/fs/cgroup/cgroup.controllers.
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
// isolation, security and early detection of issues in the mount configuration when using
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.

View File

@ -285,8 +285,30 @@ func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, erro
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
}
// getDeviceUserGroupID() is used to find the right uid/gid
// value for the device node created in the container namespace.
// The runtime executes mknod() and chmod()s the created
// device with the values returned here.
//
// On Linux, uid and gid are sufficient and the user/groupname do not
// need to be resolved.
//
// TODO(mythi): In case of user namespaces, the runtime simply bind
// mounts the devices from the host. Additional logic is needed
// to check that the runtimes effective UID/GID on the host has the
// permissions to access the device node and/or the right user namespace
// mappings are created.
//
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
if runAsVal != nil {
return uint32(runAsVal.GetValue())
}
return 0
}
// WithDevices sets the provided devices onto the container spec
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
@ -295,6 +317,8 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
s.Linux.Resources = &runtimespec.LinuxResources{}
}
oldDevices := len(s.Linux.Devices)
for _, device := range config.GetDevices() {
path, err := osi.ResolveSymbolicLink(device.HostPath)
if err != nil {
@ -306,6 +330,24 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
return err
}
}
if enableDeviceOwnershipFromSecurityContext {
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
// Loop all new devices added by oci.WithDevices() to update their
// dev.UID/dev.GID.
//
// non-zero UID/GID from SecurityContext is used to override host's
// device UID/GID for the container.
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
if UID != 0 {
*s.Linux.Devices[idx].UID = UID
}
if GID != 0 {
*s.Linux.Devices[idx].GID = GID
}
}
}
return nil
}
}

View File

@ -222,11 +222,11 @@ func (c *criService) containerSpec(
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
} else {
// add requested devices by the config as host devices are not automatically added
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
customopts.WithCapabilities(securityContext, c.allCaps))
}
} else { // not privileged
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
customopts.WithCapabilities(securityContext, c.allCaps))
}

View File

@ -1304,6 +1304,72 @@ func TestGenerateUserString(t *testing.T) {
}
}
func TestNonRootUserAndDevices(t *testing.T) {
testPid := uint32(1234)
c := newTestCRIService()
testSandboxID := "sandbox-id"
testContainerName := "container-name"
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
hostDevicesRaw, err := oci.HostDevices()
assert.NoError(t, err)
testDevice := hostDevicesRaw[0]
for desc, test := range map[string]struct {
uid, gid *runtime.Int64Value
deviceOwnershipFromSecurityContext bool
expectedDeviceUID uint32
expectedDeviceGID uint32
}{
"expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
uid: &runtime.Int64Value{Value: 1},
gid: &runtime.Int64Value{Value: 10},
expectedDeviceUID: *testDevice.UID,
expectedDeviceGID: *testDevice.GID,
},
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
uid: &runtime.Int64Value{Value: 0},
gid: &runtime.Int64Value{Value: 0},
expectedDeviceUID: *testDevice.UID,
expectedDeviceGID: *testDevice.GID,
},
"expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled": {
uid: &runtime.Int64Value{Value: 1},
gid: &runtime.Int64Value{Value: 10},
deviceOwnershipFromSecurityContext: true,
expectedDeviceUID: 1,
expectedDeviceGID: 10,
},
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled": {
uid: &runtime.Int64Value{Value: 0},
gid: &runtime.Int64Value{Value: 0},
deviceOwnershipFromSecurityContext: true,
expectedDeviceUID: *testDevice.UID,
expectedDeviceGID: *testDevice.GID,
},
} {
t.Logf("TestCase %q", desc)
c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext
containerConfig.Linux.SecurityContext.RunAsUser = test.uid
containerConfig.Linux.SecurityContext.RunAsGroup = test.gid
containerConfig.Devices = []*runtime.Device{
{
ContainerPath: testDevice.Path,
HostPath: testDevice.Path,
Permissions: "r",
},
}
spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
assert.NoError(t, err)
assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID)
assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID)
}
}
func TestPrivilegedDevices(t *testing.T) {
testPid := uint32(1234)
c := newTestCRIService()