Merge pull request #5122 from mythi/non-root-containers-and-devices-v2
cri: Devices ownership from SecurityContext
This commit is contained in:
commit
4bc5ca76e7
@ -37,7 +37,7 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
|
// WithDevices recursively adds devices from the passed in path.
|
||||||
// If devicePath is a dir it traverses the dir to add all devices in that dir.
|
// If devicePath is a dir it traverses the dir to add all devices in that dir.
|
||||||
// If devicePath is not a dir, it attempts to add the single device.
|
// If devicePath is not a dir, it attempts to add the single device.
|
||||||
func WithDevices(devicePath, containerPath, permissions string) SpecOpts {
|
func WithDevices(devicePath, containerPath, permissions string) SpecOpts {
|
||||||
|
@ -258,6 +258,9 @@ type PluginConfig struct {
|
|||||||
// present in /sys/fs/cgroup/cgroup.controllers.
|
// present in /sys/fs/cgroup/cgroup.controllers.
|
||||||
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
|
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
|
||||||
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
|
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
|
||||||
|
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
|
||||||
|
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
|
||||||
|
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
|
||||||
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
|
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
|
||||||
// isolation, security and early detection of issues in the mount configuration when using
|
// isolation, security and early detection of issues in the mount configuration when using
|
||||||
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
|
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
|
||||||
|
@ -285,8 +285,30 @@ func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, erro
|
|||||||
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
|
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getDeviceUserGroupID() is used to find the right uid/gid
|
||||||
|
// value for the device node created in the container namespace.
|
||||||
|
// The runtime executes mknod() and chmod()s the created
|
||||||
|
// device with the values returned here.
|
||||||
|
//
|
||||||
|
// On Linux, uid and gid are sufficient and the user/groupname do not
|
||||||
|
// need to be resolved.
|
||||||
|
//
|
||||||
|
// TODO(mythi): In case of user namespaces, the runtime simply bind
|
||||||
|
// mounts the devices from the host. Additional logic is needed
|
||||||
|
// to check that the runtimes effective UID/GID on the host has the
|
||||||
|
// permissions to access the device node and/or the right user namespace
|
||||||
|
// mappings are created.
|
||||||
|
//
|
||||||
|
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
|
||||||
|
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
|
||||||
|
if runAsVal != nil {
|
||||||
|
return uint32(runAsVal.GetValue())
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
// WithDevices sets the provided devices onto the container spec
|
// WithDevices sets the provided devices onto the container spec
|
||||||
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
|
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
|
||||||
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
|
||||||
if s.Linux == nil {
|
if s.Linux == nil {
|
||||||
s.Linux = &runtimespec.Linux{}
|
s.Linux = &runtimespec.Linux{}
|
||||||
@ -295,6 +317,8 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
|
|||||||
s.Linux.Resources = &runtimespec.LinuxResources{}
|
s.Linux.Resources = &runtimespec.LinuxResources{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
oldDevices := len(s.Linux.Devices)
|
||||||
|
|
||||||
for _, device := range config.GetDevices() {
|
for _, device := range config.GetDevices() {
|
||||||
path, err := osi.ResolveSymbolicLink(device.HostPath)
|
path, err := osi.ResolveSymbolicLink(device.HostPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -306,6 +330,24 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if enableDeviceOwnershipFromSecurityContext {
|
||||||
|
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
|
||||||
|
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
|
||||||
|
// Loop all new devices added by oci.WithDevices() to update their
|
||||||
|
// dev.UID/dev.GID.
|
||||||
|
//
|
||||||
|
// non-zero UID/GID from SecurityContext is used to override host's
|
||||||
|
// device UID/GID for the container.
|
||||||
|
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
|
||||||
|
if UID != 0 {
|
||||||
|
*s.Linux.Devices[idx].UID = UID
|
||||||
|
}
|
||||||
|
if GID != 0 {
|
||||||
|
*s.Linux.Devices[idx].GID = GID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -222,11 +222,11 @@ func (c *criService) containerSpec(
|
|||||||
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
|
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
|
||||||
} else {
|
} else {
|
||||||
// add requested devices by the config as host devices are not automatically added
|
// add requested devices by the config as host devices are not automatically added
|
||||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
|
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
|
||||||
customopts.WithCapabilities(securityContext, c.allCaps))
|
customopts.WithCapabilities(securityContext, c.allCaps))
|
||||||
}
|
}
|
||||||
} else { // not privileged
|
} else { // not privileged
|
||||||
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
|
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
|
||||||
customopts.WithCapabilities(securityContext, c.allCaps))
|
customopts.WithCapabilities(securityContext, c.allCaps))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1304,6 +1304,72 @@ func TestGenerateUserString(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNonRootUserAndDevices(t *testing.T) {
|
||||||
|
testPid := uint32(1234)
|
||||||
|
c := newTestCRIService()
|
||||||
|
testSandboxID := "sandbox-id"
|
||||||
|
testContainerName := "container-name"
|
||||||
|
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
|
||||||
|
|
||||||
|
hostDevicesRaw, err := oci.HostDevices()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
testDevice := hostDevicesRaw[0]
|
||||||
|
|
||||||
|
for desc, test := range map[string]struct {
|
||||||
|
uid, gid *runtime.Int64Value
|
||||||
|
deviceOwnershipFromSecurityContext bool
|
||||||
|
expectedDeviceUID uint32
|
||||||
|
expectedDeviceGID uint32
|
||||||
|
}{
|
||||||
|
"expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
|
||||||
|
uid: &runtime.Int64Value{Value: 1},
|
||||||
|
gid: &runtime.Int64Value{Value: 10},
|
||||||
|
expectedDeviceUID: *testDevice.UID,
|
||||||
|
expectedDeviceGID: *testDevice.GID,
|
||||||
|
},
|
||||||
|
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
|
||||||
|
uid: &runtime.Int64Value{Value: 0},
|
||||||
|
gid: &runtime.Int64Value{Value: 0},
|
||||||
|
expectedDeviceUID: *testDevice.UID,
|
||||||
|
expectedDeviceGID: *testDevice.GID,
|
||||||
|
},
|
||||||
|
"expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled": {
|
||||||
|
uid: &runtime.Int64Value{Value: 1},
|
||||||
|
gid: &runtime.Int64Value{Value: 10},
|
||||||
|
deviceOwnershipFromSecurityContext: true,
|
||||||
|
expectedDeviceUID: 1,
|
||||||
|
expectedDeviceGID: 10,
|
||||||
|
},
|
||||||
|
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled": {
|
||||||
|
uid: &runtime.Int64Value{Value: 0},
|
||||||
|
gid: &runtime.Int64Value{Value: 0},
|
||||||
|
deviceOwnershipFromSecurityContext: true,
|
||||||
|
expectedDeviceUID: *testDevice.UID,
|
||||||
|
expectedDeviceGID: *testDevice.GID,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Logf("TestCase %q", desc)
|
||||||
|
|
||||||
|
c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext
|
||||||
|
containerConfig.Linux.SecurityContext.RunAsUser = test.uid
|
||||||
|
containerConfig.Linux.SecurityContext.RunAsGroup = test.gid
|
||||||
|
containerConfig.Devices = []*runtime.Device{
|
||||||
|
{
|
||||||
|
ContainerPath: testDevice.Path,
|
||||||
|
HostPath: testDevice.Path,
|
||||||
|
Permissions: "r",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID)
|
||||||
|
assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPrivilegedDevices(t *testing.T) {
|
func TestPrivilegedDevices(t *testing.T) {
|
||||||
testPid := uint32(1234)
|
testPid := uint32(1234)
|
||||||
c := newTestCRIService()
|
c := newTestCRIService()
|
||||||
|
Loading…
Reference in New Issue
Block a user