containerd/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
Markus Lehtonen ed47d6ba76 cri: implement RuntimeConfig rpc
The rpc only reports one field, i.e. the cgroup driver, to kubelet.
Containerd determines the effective cgroup driver by looking at all
runtime handlers, starting from the default runtime handler (the rest in
alphabetical order), and returning the cgroup driver setting of the
first runtime handler that supports one. If no runtime handler supports
cgroup driver (i.e. has a config option for it) containerd falls back to
auto-detection, returning systemd if systemd is running and cgroupfs
otherwise.

This patch implements the CRI server side of Kubernetes KEP-4033:
https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4033-group-driver-detection-over-cri

Signed-off-by: Markus Lehtonen <markus.lehtonen@intel.com>
2023-07-28 13:50:43 +03:00

110 lines
2.8 KiB
Go

package fs
import (
"bytes"
"errors"
"reflect"
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/userns"
)
type DevicesGroup struct {
TestingSkipFinalCheck bool
}
func (s *DevicesGroup) Name() string {
return "devices"
}
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
if r.SkipDevices {
return nil
}
if path == "" {
// Return error here, since devices cgroup
// is a hard requirement for container's security.
return errSubsystemDoesNotExist
}
return apply(path, pid)
}
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
list, err := cgroups.ReadFile(path, "devices.list")
if err != nil {
return nil, err
}
return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list))
}
func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
// This defaults to a white-list -- which is what we want!
emu := &cgroupdevices.Emulator{}
for _, rule := range rules {
if err := emu.Apply(*rule); err != nil {
return nil, err
}
}
return emu, nil
}
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
if userns.RunningInUserNS() || r.SkipDevices {
return nil
}
// Generate two emulators, one for the current state of the cgroup and one
// for the requested state by the user.
current, err := loadEmulator(path)
if err != nil {
return err
}
target, err := buildEmulator(r.Devices)
if err != nil {
return err
}
// Compute the minimal set of transition rules needed to achieve the
// requested state.
transitionRules, err := current.Transition(target)
if err != nil {
return err
}
for _, rule := range transitionRules {
file := "devices.deny"
if rule.Allow {
file = "devices.allow"
}
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
return err
}
}
// Final safety check -- ensure that the resulting state is what was
// requested. This is only really correct for white-lists, but for
// black-lists we can at least check that the cgroup is in the right mode.
//
// This safety-check is skipped for the unit tests because we cannot
// currently mock devices.list correctly.
if !s.TestingSkipFinalCheck {
currentAfter, err := loadEmulator(path)
if err != nil {
return err
}
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
return errors.New("resulting devices cgroup doesn't precisely match target")
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
return errors.New("resulting devices cgroup doesn't match target mode")
}
}
return nil
}
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}