Merge pull request #5122 from mythi/non-root-containers-and-devices-v2
cri: Devices ownership from SecurityContext
This commit is contained in:
commit
4bc5ca76e7
@ -37,7 +37,7 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
|
||||
// WithDevices recursively adds devices from the passed in path.
|
||||
// If devicePath is a dir it traverses the dir to add all devices in that dir.
|
||||
// If devicePath is not a dir, it attempts to add the single device.
|
||||
func WithDevices(devicePath, containerPath, permissions string) SpecOpts {
|
||||
|
@ -258,6 +258,9 @@ type PluginConfig struct {
|
||||
// present in /sys/fs/cgroup/cgroup.controllers.
|
||||
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
|
||||
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
|
||||
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
|
||||
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
|
||||
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
|
||||
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
|
||||
// isolation, security and early detection of issues in the mount configuration when using
|
||||
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
|
||||
|
@ -285,8 +285,30 @@ func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, erro
|
||||
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
|
||||
}
|
||||
|
||||
// getDeviceUserGroupID() is used to find the right uid/gid
|
||||
// value for the device node created in the container namespace.
|
||||
// The runtime executes mknod() and chmod()s the created
|
||||
// device with the values returned here.
|
||||
//
|
||||
// On Linux, uid and gid are sufficient and the user/groupname do not
|
||||
// need to be resolved.
|
||||
//
|
||||
// TODO(mythi): In case of user namespaces, the runtime simply bind
|
||||
// mounts the devices from the host. Additional logic is needed
|
||||
// to check that the runtimes effective UID/GID on the host has the
|
||||
// permissions to access the device node and/or the right user namespace
|
||||
// mappings are created.
|
||||
//
|
||||
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
|
||||
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
|
||||
if runAsVal != nil {
|
||||
return uint32(runAsVal.GetValue())
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// WithDevices sets the provided devices onto the container spec
|
||||
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
|
||||
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
|
||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||
if s.Linux == nil {
|
||||
s.Linux = &runtimespec.Linux{}
|
||||
@ -295,6 +317,8 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
|
||||
s.Linux.Resources = &runtimespec.LinuxResources{}
|
||||
}
|
||||
|
||||
oldDevices := len(s.Linux.Devices)
|
||||
|
||||
for _, device := range config.GetDevices() {
|
||||
path, err := osi.ResolveSymbolicLink(device.HostPath)
|
||||
if err != nil {
|
||||
@ -306,6 +330,24 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if enableDeviceOwnershipFromSecurityContext {
|
||||
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
|
||||
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
|
||||
// Loop all new devices added by oci.WithDevices() to update their
|
||||
// dev.UID/dev.GID.
|
||||
//
|
||||
// non-zero UID/GID from SecurityContext is used to override host's
|
||||
// device UID/GID for the container.
|
||||
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
|
||||
if UID != 0 {
|
||||
*s.Linux.Devices[idx].UID = UID
|
||||
}
|
||||
if GID != 0 {
|
||||
*s.Linux.Devices[idx].GID = GID
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -222,11 +222,11 @@ func (c *criService) containerSpec(
|
||||
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
|
||||
} else {
|
||||
// add requested devices by the config as host devices are not automatically added
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
|
||||
customopts.WithCapabilities(securityContext, c.allCaps))
|
||||
}
|
||||
} else { // not privileged
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
|
||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
|
||||
customopts.WithCapabilities(securityContext, c.allCaps))
|
||||
}
|
||||
|
||||
|
@ -1304,6 +1304,72 @@ func TestGenerateUserString(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNonRootUserAndDevices(t *testing.T) {
|
||||
testPid := uint32(1234)
|
||||
c := newTestCRIService()
|
||||
testSandboxID := "sandbox-id"
|
||||
testContainerName := "container-name"
|
||||
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
|
||||
|
||||
hostDevicesRaw, err := oci.HostDevices()
|
||||
assert.NoError(t, err)
|
||||
|
||||
testDevice := hostDevicesRaw[0]
|
||||
|
||||
for desc, test := range map[string]struct {
|
||||
uid, gid *runtime.Int64Value
|
||||
deviceOwnershipFromSecurityContext bool
|
||||
expectedDeviceUID uint32
|
||||
expectedDeviceGID uint32
|
||||
}{
|
||||
"expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
|
||||
uid: &runtime.Int64Value{Value: 1},
|
||||
gid: &runtime.Int64Value{Value: 10},
|
||||
expectedDeviceUID: *testDevice.UID,
|
||||
expectedDeviceGID: *testDevice.GID,
|
||||
},
|
||||
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
|
||||
uid: &runtime.Int64Value{Value: 0},
|
||||
gid: &runtime.Int64Value{Value: 0},
|
||||
expectedDeviceUID: *testDevice.UID,
|
||||
expectedDeviceGID: *testDevice.GID,
|
||||
},
|
||||
"expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled": {
|
||||
uid: &runtime.Int64Value{Value: 1},
|
||||
gid: &runtime.Int64Value{Value: 10},
|
||||
deviceOwnershipFromSecurityContext: true,
|
||||
expectedDeviceUID: 1,
|
||||
expectedDeviceGID: 10,
|
||||
},
|
||||
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled": {
|
||||
uid: &runtime.Int64Value{Value: 0},
|
||||
gid: &runtime.Int64Value{Value: 0},
|
||||
deviceOwnershipFromSecurityContext: true,
|
||||
expectedDeviceUID: *testDevice.UID,
|
||||
expectedDeviceGID: *testDevice.GID,
|
||||
},
|
||||
} {
|
||||
t.Logf("TestCase %q", desc)
|
||||
|
||||
c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext
|
||||
containerConfig.Linux.SecurityContext.RunAsUser = test.uid
|
||||
containerConfig.Linux.SecurityContext.RunAsGroup = test.gid
|
||||
containerConfig.Devices = []*runtime.Device{
|
||||
{
|
||||
ContainerPath: testDevice.Path,
|
||||
HostPath: testDevice.Path,
|
||||
Permissions: "r",
|
||||
},
|
||||
}
|
||||
|
||||
spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID)
|
||||
assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrivilegedDevices(t *testing.T) {
|
||||
testPid := uint32(1234)
|
||||
c := newTestCRIService()
|
||||
|
Loading…
Reference in New Issue
Block a user