Revert "Update runc to 1.0.0"

This commit is contained in:
Odin Ugedal
2021-07-05 14:03:04 +02:00
committed by GitHub
parent 5e3bed6399
commit 61d88af9e4
146 changed files with 1196 additions and 2702 deletions

View File

@@ -258,9 +258,9 @@ func (e *Emulator) Apply(rule devices.Rule) error {
if rule.Allow {
return e.allow(innerRule)
} else {
return e.deny(innerRule)
}
return e.deny(innerRule)
}
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
@@ -371,12 +371,3 @@ func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
}
return transitionRules, nil
}
// Rules returns the minimum set of rules necessary to convert a *deny-all*
// cgroup to the emulated filter state (note that this is not the same as a
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
// wrapper around Transition() with the source emulator being an empty cgroup.
func (e *Emulator) Rules() ([]*devices.Rule, error) {
defaultCgroup := &Emulator{defaultAllow: false}
return defaultCgroup.Transition(e)
}

View File

@@ -11,7 +11,6 @@ import (
"strconv"
"github.com/cilium/ebpf/asm"
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
@@ -23,44 +22,11 @@ const (
)
// DeviceFilter returns eBPF device filter program and its license string
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
// Generate the minimum ruleset for the device rules we are given. While we
// don't care about minimum transitions in cgroupv2, using the emulator
// gives us a guarantee that the behaviour of devices filtering is the same
// as cgroupv1, including security hardenings to avoid misconfiguration
// (such as punching holes in wildcard rules).
emu := new(devicesemulator.Emulator)
for _, rule := range rules {
if err := emu.Apply(*rule); err != nil {
return nil, "", err
}
}
cleanRules, err := emu.Rules()
if err != nil {
return nil, "", err
}
p := &program{
defaultAllow: emu.IsBlacklist(),
}
func DeviceFilter(devices []*devices.Rule) (asm.Instructions, string, error) {
p := &program{}
p.init()
for idx, rule := range cleanRules {
if rule.Type == devices.WildcardDevice {
// We can safely skip over wildcard entries because there should
// only be one (at most) at the very start to instruct cgroupv1 to
// go into allow-list mode. However we do double-check this here.
if idx != 0 || rule.Allow != emu.IsBlacklist() {
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
}
continue
}
if rule.Allow == p.defaultAllow {
// There should be no rules which have an action equal to the
// default action, the emulator removes those.
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
}
if err := p.appendRule(rule); err != nil {
for i := len(devices) - 1; i >= 0; i-- {
if err := p.appendDevice(devices[i]); err != nil {
return nil, "", err
}
}
@@ -69,9 +35,9 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
}
type program struct {
insts asm.Instructions
defaultAllow bool
blockID int
insts asm.Instructions
hasWildCard bool
blockID int
}
func (p *program) init() {
@@ -101,35 +67,39 @@ func (p *program) init() {
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
}
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
// to the in-progress filter program. In order to operate properly, it must be
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
// with any "a" rules removed).
func (p *program) appendRule(rule *devices.Rule) error {
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
func (p *program) appendDevice(dev *devices.Rule) error {
if p.blockID < 0 {
return errors.New("the program is finalized")
}
if p.hasWildCard {
// All entries after wildcard entry are ignored
return nil
}
var bpfType int32
switch rule.Type {
case devices.CharDevice:
bpfType := int32(-1)
hasType := true
switch dev.Type {
case 'c':
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
case devices.BlockDevice:
case 'b':
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
case 'a':
hasType = false
default:
// We do not permit 'a', nor any other types we don't know about.
return errors.Errorf("invalid type %q", string(rule.Type))
// if not specified in OCI json, typ is set to DeviceTypeAll
return errors.Errorf("invalid Type %q", string(dev.Type))
}
if rule.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", rule.Major)
if dev.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", dev.Major)
}
if rule.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", rule.Major)
if dev.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", dev.Major)
}
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := rule.Minor >= 0
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := dev.Minor >= 0
bpfAccess := int32(0)
for _, r := range rule.Permissions {
for _, r := range dev.Permissions {
switch r {
case 'r':
bpfAccess |= unix.BPF_DEVCG_ACC_READ
@@ -149,10 +119,12 @@ func (p *program) appendRule(rule *devices.Rule) error {
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
prevBlockLastIdx = len(p.insts) - 1
)
p.insts = append(p.insts,
// if (R2 != bpfType) goto next
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
)
if hasType {
p.insts = append(p.insts,
// if (R2 != bpfType) goto next
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
)
}
if hasAccess {
p.insts = append(p.insts,
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
@@ -164,16 +136,19 @@ func (p *program) appendRule(rule *devices.Rule) error {
if hasMajor {
p.insts = append(p.insts,
// if (R4 != major) goto next
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
)
}
if hasMinor {
p.insts = append(p.insts,
// if (R5 != minor) goto next
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
)
}
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
if !hasType && !hasAccess && !hasMajor && !hasMinor {
p.hasWildCard = true
}
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
// set blockSym to the first instruction we added in this iteration
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
p.blockID++
@@ -181,14 +156,14 @@ func (p *program) appendRule(rule *devices.Rule) error {
}
func (p *program) finalize() (asm.Instructions, error) {
var v int32
if p.defaultAllow {
v = 1
if p.hasWildCard {
// acceptBlock with asm.Return() is already inserted
return p.insts, nil
}
blockSym := "block-" + strconv.Itoa(p.blockID)
p.insts = append(p.insts,
// R0 <- v
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
// R0 <- 0
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
asm.Return(),
)
p.blockID = -1
@@ -196,7 +171,7 @@ func (p *program) finalize() (asm.Instructions, error) {
}
func acceptBlock(accept bool) asm.Instructions {
var v int32
v := int32(0)
if accept {
v = 1
}

View File

@@ -0,0 +1,57 @@
package ebpf
import (
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
nilCloser := func() error {
return nil
}
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
License: license,
}
prog, err := ebpf.NewProgram(spec)
if err != nil {
return nilCloser, err
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFD,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI,
})
if err != nil {
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFD,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
}
return nil
}
return closer, nil
}

View File

@@ -1,240 +0,0 @@
package ebpf
import (
"fmt"
"os"
"runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func nilCloser() error {
return nil
}
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
type bpfAttrQuery struct {
TargetFd uint32
AttachType uint32
QueryType uint32
AttachFlags uint32
ProgIds uint64 // __aligned_u64
ProgCnt uint32
}
// Currently you can only have 64 eBPF programs attached to a cgroup.
size := 64
retries := 0
for retries < 10 {
progIds := make([]uint32, size)
query := bpfAttrQuery{
TargetFd: uint32(dirFd),
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
ProgCnt: uint32(len(progIds)),
}
// Fetch the list of program ids.
_, _, errno := unix.Syscall(unix.SYS_BPF,
uintptr(unix.BPF_PROG_QUERY),
uintptr(unsafe.Pointer(&query)),
unsafe.Sizeof(query))
size = int(query.ProgCnt)
runtime.KeepAlive(query)
if errno != 0 {
// On ENOSPC we get the correct number of programs.
if errno == unix.ENOSPC {
retries++
continue
}
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
}
// Convert the ids to program handles.
progIds = progIds[:size]
programs := make([]*ebpf.Program, len(progIds))
for idx, progId := range progIds {
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
if err != nil {
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
}
programs[idx] = program
}
runtime.KeepAlive(progIds)
return programs, nil
}
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
}
var (
haveBpfProgReplaceBool bool
haveBpfProgReplaceOnce sync.Once
)
// Loosely based on the BPF_F_REPLACE support check in
// <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
//
// TODO: move this logic to cilium/ebpf
func haveBpfProgReplace() bool {
haveBpfProgReplaceOnce.Do(func() {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
})
if err != nil {
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
return
}
defer prog.Close()
devnull, err := os.Open("/dev/null")
if err != nil {
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
return
}
defer devnull.Close()
// We know that we have BPF_PROG_ATTACH since we can load
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
// we know that the feature isn't present.
err = link.RawAttachProgram(link.RawAttachProgramOptions{
// We rely on this fd being checked after attachFlags.
Target: int(devnull.Fd()),
// Attempt to "replace" bad fds with this program.
Program: prog,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
})
if errors.Is(err, unix.EINVAL) {
// not supported
return
}
// attach_flags test succeded.
if !errors.Is(err, unix.EBADF) {
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
}
haveBpfProgReplaceBool = true
})
return haveBpfProgReplaceBool
}
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
// Get the list of existing programs.
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
if err != nil {
return nilCloser, err
}
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
// Generate new program.
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
License: license,
}
prog, err := ebpf.NewProgram(spec)
if err != nil {
return nilCloser, err
}
// If there is only one old program, we can just replace it directly.
var (
replaceProg *ebpf.Program
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
)
if useReplaceProg {
replaceProg = oldProgs[0]
attachFlags |= unix.BPF_F_REPLACE
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFd,
Program: prog,
Replace: replaceProg,
Attach: ebpf.AttachCGroupDevice,
Flags: attachFlags,
})
if err != nil {
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
}
// TODO: Should we attach the old filters back in this case? Otherwise
// we fail-open on a security feature, which is a bit scary.
return nil
}
if !useReplaceProg {
logLevel := logrus.DebugLevel
// If there was more than one old program, give a warning (since this
// really shouldn't happen with runc-managed cgroups) and then detach
// all the old programs.
if len(oldProgs) > 1 {
// NOTE: Ideally this should be a warning but it turns out that
// systemd-managed cgroups trigger this warning (apparently
// systemd doesn't delete old non-systemd programs when
// setting properties).
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
logLevel = logrus.InfoLevel
}
for idx, oldProg := range oldProgs {
// Output some extra debug info.
if info, err := oldProg.Info(); err == nil {
fields := logrus.Fields{
"type": info.Type.String(),
"tag": info.Tag,
"name": info.Name,
}
if id, ok := info.ID(); ok {
fields["id"] = id
}
if runCount, ok := info.RunCount(); ok {
fields["run_count"] = runCount
}
if runtime, ok := info.Runtime(); ok {
fields["runtime"] = runtime.String()
}
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
}
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: oldProg,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
}
}
}
return closer, nil
}

View File

@@ -6,17 +6,15 @@ import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type BlkioGroup struct {
weightFilename string
weightDeviceFilename string
}
func (s *BlkioGroup) Name() string {
@@ -28,47 +26,42 @@ func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
}
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
s.detectWeightFilenames(path)
if r.BlkioWeight != 0 {
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
return err
}
}
if r.BlkioLeafWeight != 0 {
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
return err
}
}
for _, wd := range r.BlkioWeightDevice {
if wd.Weight != 0 {
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
return err
}
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
return err
}
if wd.LeafWeight != 0 {
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
return err
}
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleReadBpsDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteBpsDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleReadIOPSDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteIOPSDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
return err
}
}
@@ -113,7 +106,7 @@ func splitBlkioStatLine(r rune) bool {
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
var blkioStats []cgroups.BlkioStatEntry
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
f, err := fscommon.OpenFile(dir, file, os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return blkioStats, nil
@@ -168,7 +161,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
filename string
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
}
bfqDebugStats := []blkioStatInfo{
var bfqDebugStats = []blkioStatInfo{
{
filename: "blkio.bfq.sectors_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
@@ -202,7 +195,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
bfqStats := []blkioStatInfo{
var bfqStats = []blkioStatInfo{
{
filename: "blkio.bfq.io_serviced_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@@ -212,7 +205,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
cfqStats := []blkioStatInfo{
var cfqStats = []blkioStatInfo{
{
filename: "blkio.sectors_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
@@ -246,7 +239,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
throttleRecursiveStats := []blkioStatInfo{
var throttleRecursiveStats = []blkioStatInfo{
{
filename: "blkio.throttle.io_serviced_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@@ -256,7 +249,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
baseStats := []blkioStatInfo{
var baseStats = []blkioStatInfo{
{
filename: "blkio.throttle.io_serviced",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@@ -266,7 +259,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
orderedStats := [][]blkioStatInfo{
var orderedStats = [][]blkioStatInfo{
bfqDebugStats,
bfqStats,
cfqStats,
@@ -287,7 +280,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
return err
}
*statInfo.blkioStatEntriesPtr = blkioStats
// finish if all stats are gathered
//finish if all stats are gathered
if i == len(statGroup)-1 {
return nil
}
@@ -295,17 +288,3 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
}
return nil
}
func (s *BlkioGroup) detectWeightFilenames(path string) {
if s.weightFilename != "" {
// Already detected.
return
}
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
s.weightFilename = "blkio.weight"
s.weightDeviceFilename = "blkio.weight_device"
} else {
s.weightFilename = "blkio.bfq.weight"
s.weightDeviceFilename = "blkio.bfq.weight_device"
}
}

View File

@@ -13,7 +13,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type CpuGroup struct{}
type CpuGroup struct {
}
func (s *CpuGroup) Name() string {
return "cpu"
@@ -25,7 +26,7 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0o755); err != nil {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// We should set the real-Time group scheduling settings before moving
@@ -41,12 +42,12 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
if r.CpuRtPeriod != 0 {
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
return err
}
}
if r.CpuRtRuntime != 0 {
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
return err
}
}
@@ -56,7 +57,7 @@ func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
if r.CpuShares != 0 {
shares := r.CpuShares
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
return err
}
// read it back
@@ -72,12 +73,12 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
}
}
if r.CpuPeriod != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil {
return err
}
}
if r.CpuQuota != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
return err
}
}
@@ -85,7 +86,7 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
}
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY)
f, err := fscommon.OpenFile(path, "cpu.stat", os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return nil

View File

@@ -32,7 +32,8 @@ const (
clockTicks uint64 = 100
)
type CpuacctGroup struct{}
type CpuacctGroup struct {
}
func (s *CpuacctGroup) Name() string {
return "cpuacct"
@@ -90,7 +91,7 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
// Expected format:
// user <usage in ticks>
// system <usage in ticks>
data, err := cgroups.ReadFile(path, cgroupCpuacctStat)
data, err := fscommon.ReadFile(path, cgroupCpuacctStat)
if err != nil {
return 0, 0, err
}
@@ -116,7 +117,7 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
func getPercpuUsage(path string) ([]uint64, error) {
percpuUsage := []uint64{}
data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu")
data, err := fscommon.ReadFile(path, "cpuacct.usage_percpu")
if err != nil {
return percpuUsage, err
}
@@ -134,7 +135,7 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
usageKernelMode := []uint64{}
usageUserMode := []uint64{}
file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
file, err := fscommon.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
if os.IsNotExist(err) {
return usageKernelMode, usageUserMode, nil
} else if err != nil {
@@ -143,7 +144,7 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Scan() // skipping header line
scanner.Scan() //skipping header line
for scanner.Scan() {
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)

View File

@@ -16,7 +16,8 @@ import (
"golang.org/x/sys/unix"
)
type CpusetGroup struct{}
type CpusetGroup struct {
}
func (s *CpusetGroup) Name() string {
return "cpuset"
@@ -28,12 +29,12 @@ func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
if r.CpusetCpus != "" {
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
if err := fscommon.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
return err
}
}
if r.CpusetMems != "" {
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
if err := fscommon.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
return err
}
}
@@ -155,7 +156,7 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
return err
}
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
if err := os.Mkdir(dir, 0755); err != nil && !os.IsExist(err) {
return err
}
// We didn't inherit cpuset configs from parent, but we have
@@ -175,10 +176,10 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
}
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
if cpus, err = fscommon.ReadFile(parent, "cpuset.cpus"); err != nil {
return
}
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
if mems, err = fscommon.ReadFile(parent, "cpuset.mems"); err != nil {
return
}
return cpus, mems, nil
@@ -205,7 +206,7 @@ func cpusetEnsureParent(current string) error {
if err := cpusetEnsureParent(parent); err != nil {
return err
}
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return cpusetCopyIfNeeded(current, parent)
@@ -224,12 +225,12 @@ func cpusetCopyIfNeeded(current, parent string) error {
}
if isEmptyCpuset(currentCpus) {
if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
return err
}
}
if isEmptyCpuset(currentMems) {
if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
return err
}
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/userns"
@@ -35,7 +36,7 @@ func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
}
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
list, err := cgroups.ReadFile(path, "devices.list")
list, err := fscommon.ReadFile(path, "devices.list")
if err != nil {
return nil, err
}
@@ -80,7 +81,7 @@ func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
if rule.Allow {
file = "devices.allow"
}
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
if err := fscommon.WriteFile(path, file, rule.CgroupString()); err != nil {
return err
}
}

View File

@@ -10,12 +10,14 @@ import (
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type FreezerGroup struct{}
type FreezerGroup struct {
}
func (s *FreezerGroup) Name() string {
return "freezer"
@@ -33,7 +35,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// Freezing failed, and it is bad and dangerous
// to leave the cgroup in FROZEN or FREEZING
// state, so (try to) thaw it back.
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
}
}()
@@ -66,11 +68,11 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// the chances to succeed in freezing
// in case new processes keep appearing
// in the cgroup.
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
time.Sleep(10 * time.Millisecond)
}
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
return err
}
@@ -81,7 +83,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// system.
time.Sleep(10 * time.Microsecond)
}
state, err := cgroups.ReadFile(path, "freezer.state")
state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil {
return err
}
@@ -102,7 +104,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// Despite our best efforts, it got stuck in FREEZING.
return errors.New("unable to freeze")
case configs.Thawed:
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
case configs.Undefined:
return nil
default:
@@ -116,7 +118,7 @@ func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
for {
state, err := cgroups.ReadFile(path, "freezer.state")
state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil {
// If the kernel is too old, then we just treat the freezer as
// being in an "undefined" state.

View File

@@ -64,10 +64,8 @@ func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgro
}
// The absolute path to the root of the cgroup hierarchies.
var (
cgroupRootLock sync.Mutex
cgroupRoot string
)
var cgroupRootLock sync.Mutex
var cgroupRoot string
const defaultCgroupRoot = "/sys/fs/cgroup"
@@ -395,7 +393,7 @@ func join(path string, pid int) error {
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0o755); err != nil {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
return cgroups.WriteCgroupProc(path, pid)

View File

@@ -11,7 +11,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type HugetlbGroup struct{}
type HugetlbGroup struct {
}
func (s *HugetlbGroup) Name() string {
return "hugetlb"
@@ -23,7 +24,7 @@ func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
if err := fscommon.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
}

View File

@@ -25,7 +25,8 @@ const (
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
)
type MemoryGroup struct{}
type MemoryGroup struct {
}
func (s *MemoryGroup) Name() string {
return "memory"
@@ -40,7 +41,7 @@ func setMemory(path string, val int64) error {
return nil
}
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
if !errors.Is(err, unix.EBUSY) {
return err
}
@@ -64,7 +65,7 @@ func setSwap(path string, val int64) error {
return nil
}
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
return fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
}
func setMemoryAndSwap(path string, r *configs.Resources) error {
@@ -117,20 +118,20 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
// ignore KernelMemory and KernelMemoryTCP
if r.MemoryReservation != 0 {
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
return err
}
}
if r.OomKillDisable {
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
return err
}
}
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
return nil
} else if *r.MemorySwappiness <= 100 {
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
return err
}
} else {
@@ -142,7 +143,7 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY)
statsFile, err := fscommon.OpenFile(path, "memory.stat", os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return nil
@@ -199,6 +200,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}
func memoryAssigned(cgroup *configs.Cgroup) bool {
return cgroup.Resources.Memory != 0 ||
cgroup.Resources.MemoryReservation != 0 ||
cgroup.Resources.MemorySwap > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
memoryData := cgroups.MemoryData{}
@@ -249,13 +258,12 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
)
stats := cgroups.PageUsageByNUMA{}
file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY)
file, err := fscommon.OpenFile(cgroupPath, filename, os.O_RDONLY)
if os.IsNotExist(err) {
return stats, nil
} else if err != nil {
return stats, err
}
defer file.Close()
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
// and it looks like this:

View File

@@ -6,10 +6,12 @@ import (
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type NetClsGroup struct{}
type NetClsGroup struct {
}
func (s *NetClsGroup) Name() string {
return "net_cls"
@@ -21,7 +23,7 @@ func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
if r.NetClsClassid != 0 {
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
return err
}
}

View File

@@ -4,10 +4,12 @@ package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type NetPrioGroup struct{}
type NetPrioGroup struct {
}
func (s *NetPrioGroup) Name() string {
return "net_prio"
@@ -19,7 +21,7 @@ func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
for _, prioMap := range r.NetPrioIfpriomap {
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
return err
}
}

View File

@@ -7,7 +7,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type PerfEventGroup struct{}
type PerfEventGroup struct {
}
func (s *PerfEventGroup) Name() string {
return "perf_event"

View File

@@ -12,7 +12,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type PidsGroup struct{}
type PidsGroup struct {
}
func (s *PidsGroup) Name() string {
return "pids"
@@ -31,7 +32,7 @@ func (s *PidsGroup) Set(path string, r *configs.Resources) error {
limit = strconv.FormatInt(r.PidsLimit, 10)
}
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
return err
}
}

View File

@@ -23,7 +23,7 @@ func setCpu(dirPath string, r *configs.Resources) error {
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
if r.CpuWeight != 0 {
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
return err
}
}
@@ -40,16 +40,15 @@ func setCpu(dirPath string, r *configs.Resources) error {
period = 100000
}
str += " " + strconv.FormatUint(period, 10)
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
if err := fscommon.WriteFile(dirPath, "cpu.max", str); err != nil {
return err
}
}
return nil
}
func statCpu(dirPath string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
f, err := fscommon.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
if err != nil {
return err
}

View File

@@ -3,7 +3,7 @@
package fs2
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
@@ -17,12 +17,12 @@ func setCpuset(dirPath string, r *configs.Resources) error {
}
if r.CpusetCpus != "" {
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
return err
}
}
if r.CpusetMems != "" {
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
if err := fscommon.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
return err
}
}

View File

@@ -6,12 +6,12 @@ import (
"path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
func supportedControllers() (string, error) {
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
return fscommon.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
}
// needAnyControllers returns whether we enable some supported controllers or not,
@@ -92,7 +92,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
for i, e := range elements {
current = filepath.Join(current, e)
if i > 0 {
if err := os.Mkdir(current, 0o755); err != nil {
if err := os.Mkdir(current, 0755); err != nil {
if !os.IsExist(err) {
return err
}
@@ -105,7 +105,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
}
}()
}
cgType, _ := cgroups.ReadFile(current, cgTypeFile)
cgType, _ := fscommon.ReadFile(current, cgTypeFile)
cgType = strings.TrimSpace(cgType)
switch cgType {
// If the cgroup is in an invalid mode (usually this means there's an internal
@@ -122,7 +122,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
// since that means we're a properly delegated cgroup subtree) but in
// this case there's not much we can do and it's better than giving an
// error.
_ = cgroups.WriteFile(current, cgTypeFile, "threaded")
_ = fscommon.WriteFile(current, cgTypeFile, "threaded")
}
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
// (and you cannot usually take a cgroup out of threaded mode).
@@ -136,11 +136,11 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
}
// enable all supported controllers
if i < len(elements)-1 {
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
if err := fscommon.WriteFile(current, cgStCtlFile, res); err != nil {
// try write one by one
allCtrs := strings.Split(res, " ")
for _, ctr := range allCtrs {
_ = cgroups.WriteFile(current, cgStCtlFile, ctr)
_ = fscommon.WriteFile(current, cgStCtlFile, ctr)
}
}
// Some controllers might not be enabled when rootless or containerized,

View File

@@ -82,7 +82,9 @@ func parseCgroupFile(path string) (string, error) {
}
func parseCgroupFromReader(r io.Reader) (string, error) {
s := bufio.NewScanner(r)
var (
s = bufio.NewScanner(r)
)
for s.Scan() {
var (
text = s.Text()

View File

@@ -58,15 +58,29 @@ func setDevices(dirPath string, r *configs.Resources) error {
if r.SkipDevices {
return nil
}
// XXX: This is currently a white-list (but all callers pass a blacklist of
// devices). This is bad for a whole variety of reasons, but will need
// to be fixed with co-ordinated effort with downstreams.
insts, license, err := devicefilter.DeviceFilter(r.Devices)
if err != nil {
return err
}
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
if err != nil {
return errors.Errorf("cannot get dir FD for %s", dirPath)
}
defer unix.Close(dirFD)
// XXX: This code is currently incorrect when it comes to updating an
// existing cgroup with new rules (new rulesets are just appended to
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
// programs.
//
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
// race-free blacklist because it acts as a whitelist by default, and
// having a deny-everything program cannot be overridden by other
// programs. You could temporarily insert a deny-everything program
// but that would result in spurrious failures during updates.
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
if !canSkipEBPFError(r) {
return err

View File

@@ -3,20 +3,27 @@
package fs2
import (
"bufio"
stdErrors "errors"
"fmt"
"os"
"strings"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func setFreezer(dirPath string, state configs.FreezerState) error {
if err := supportsFreezer(dirPath); err != nil {
// We can ignore this request as long as the user didn't ask us to
// freeze the container (since without the freezer cgroup, that's a
// no-op).
if state == configs.Undefined || state == configs.Thawed {
return nil
}
return errors.Wrap(err, "freezer not supported")
}
var stateStr string
switch state {
case configs.Undefined:
@@ -29,23 +36,11 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
return errors.Errorf("invalid freezer state %q requested", state)
}
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
if err != nil {
// We can ignore this request as long as the user didn't ask us to
// freeze the container (since without the freezer cgroup, that's a
// no-op).
if state != configs.Frozen {
return nil
}
return errors.Wrap(err, "freezer not supported")
}
defer fd.Close()
if _, err := fd.WriteString(stateStr); err != nil {
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil {
return err
}
// Confirm that the cgroup did actually change states.
if actualState, err := readFreezer(dirPath, fd); err != nil {
if actualState, err := getFreezer(dirPath); err != nil {
return err
} else if actualState != state {
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
@@ -53,8 +48,13 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
return nil
}
func supportsFreezer(dirPath string) error {
_, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
return err
}
func getFreezer(dirPath string) (configs.FreezerState, error) {
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
state, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
if err != nil {
// If the kernel is too old, then we just treat the freezer as being in
// an "undefined" state.
@@ -63,67 +63,12 @@ func getFreezer(dirPath string) (configs.FreezerState, error) {
}
return configs.Undefined, err
}
defer fd.Close()
return readFreezer(dirPath, fd)
}
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
if _, err := fd.Seek(0, 0); err != nil {
return configs.Undefined, err
}
state := make([]byte, 2)
if _, err := fd.Read(state); err != nil {
return configs.Undefined, err
}
switch string(state) {
case "0\n":
switch strings.TrimSpace(state) {
case "0":
return configs.Thawed, nil
case "1\n":
return waitFrozen(dirPath)
case "1":
return configs.Frozen, nil
default:
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
}
}
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
func waitFrozen(dirPath string) (configs.FreezerState, error) {
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
if err != nil {
return configs.Undefined, err
}
defer fd.Close()
// XXX: Simple wait/read/retry is used here. An implementation
// based on poll(2) or inotify(7) is possible, but it makes the code
// much more complicated. Maybe address this later.
const (
// Perform maxIter with waitTime in between iterations.
waitTime = 10 * time.Millisecond
maxIter = 1000
)
scanner := bufio.NewScanner(fd)
for i := 0; scanner.Scan(); {
if i == maxIter {
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
}
line := scanner.Text()
val := strings.TrimPrefix(line, "frozen ")
if val != line { // got prefix
if val[0] == '1' {
return configs.Frozen, nil
}
i++
// wait, then re-read
time.Sleep(waitTime)
_, err := fd.Seek(0, 0)
if err != nil {
return configs.Undefined, err
}
}
}
// Should only reach here either on read error,
// or if the file does not contain "frozen " line.
return configs.Undefined, scanner.Err()
}

View File

@@ -51,7 +51,7 @@ func (m *manager) getControllers() error {
return nil
}
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
data, err := fscommon.ReadFile(m.dirPath, "cgroup.controllers")
if err != nil {
if m.rootless && m.config.Path == "" {
return nil
@@ -98,7 +98,9 @@ func (m *manager) GetAllPids() ([]int, error) {
}
func (m *manager) GetStats() (*cgroups.Stats, error) {
var errs []error
var (
errs []error
)
st := cgroups.NewStats()
@@ -197,7 +199,7 @@ func (m *manager) setUnified(res map[string]string) error {
if strings.Contains(k, "/") {
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
}
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
if err := fscommon.WriteFile(m.dirPath, k, v); err != nil {
errC := errors.Cause(err)
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) {

View File

@@ -21,7 +21,7 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
return nil
}
for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
if err := fscommon.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
}

View File

@@ -4,95 +4,60 @@ package fs2
import (
"bufio"
"bytes"
"fmt"
"os"
"strconv"
"strings"
"github.com/sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
func isIoSet(r *configs.Resources) bool {
return r.BlkioWeight != 0 ||
len(r.BlkioWeightDevice) > 0 ||
len(r.BlkioThrottleReadBpsDevice) > 0 ||
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
len(r.BlkioThrottleWriteIOPSDevice) > 0
}
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
func bfqDeviceWeightSupported(bfq *os.File) bool {
if bfq == nil {
return false
}
_, _ = bfq.Seek(0, 0)
buf := make([]byte, 32)
_, _ = bfq.Read(buf)
// If only a single number (default weight) if read back, we have older kernel.
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
return err != nil
}
func setIo(dirPath string, r *configs.Resources) error {
if !isIoSet(r) {
return nil
}
// If BFQ IO scheduler is available, use it.
var bfq *os.File
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
var err error
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
if err == nil {
defer bfq.Close()
} else if !os.IsNotExist(err) {
return err
}
}
if r.BlkioWeight != 0 {
if bfq != nil { // Use BFQ.
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
filename := "io.bfq.weight"
if err := fscommon.WriteFile(dirPath, filename,
strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
// if io.bfq.weight does not exist, then bfq module is not loaded.
// Fallback to use io.weight with a conversion scheme
if !os.IsNotExist(err) {
return err
}
} else {
// Fallback to io.weight with a conversion scheme.
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
if err := fscommon.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
return err
}
}
}
if bfqDeviceWeightSupported(bfq) {
for _, wd := range r.BlkioWeightDevice {
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
}
}
}
for _, td := range r.BlkioThrottleReadBpsDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteBpsDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleReadIOPSDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteIOPSDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
return err
}
}
@@ -102,7 +67,7 @@ func setIo(dirPath string, r *configs.Resources) error {
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
ret := map[string][]string{}
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
f, err := fscommon.OpenFile(dirPath, name, os.O_RDONLY)
if err != nil {
return nil, err
}
@@ -123,22 +88,22 @@ func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error
}
func statIo(dirPath string, stats *cgroups.Stats) error {
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
values, err := readCgroup2MapFile(dirPath, "io.stat")
if err != nil {
return err
}
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
var parsedStats cgroups.BlkioStats
for k, v := range values {
d := strings.Split(k, ":")
if len(d) != 2 {
continue
}
major, err := strconv.ParseUint(d[0], 10, 64)
major, err := strconv.ParseUint(d[0], 10, 0)
if err != nil {
return err
}
minor, err := strconv.ParseUint(d[1], 10, 64)
minor, err := strconv.ParseUint(d[1], 10, 0)
if err != nil {
return err
}
@@ -150,32 +115,15 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
}
op := d[0]
// Map to the cgroupv1 naming and layout (in separate tables).
var targetTable *[]cgroups.BlkioStatEntry
// Accommodate the cgroup v1 naming
switch op {
// Equivalent to cgroupv1's blkio.io_service_bytes.
case "rbytes":
op = "Read"
targetTable = &parsedStats.IoServiceBytesRecursive
op = "read"
case "wbytes":
op = "Write"
targetTable = &parsedStats.IoServiceBytesRecursive
// Equivalent to cgroupv1's blkio.io_serviced.
case "rios":
op = "Read"
targetTable = &parsedStats.IoServicedRecursive
case "wios":
op = "Write"
targetTable = &parsedStats.IoServicedRecursive
default:
// Skip over entries we cannot map to cgroupv1 stats for now.
// In the future we should expand the stats struct to include
// them.
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
continue
op = "write"
}
value, err := strconv.ParseUint(d[1], 10, 64)
value, err := strconv.ParseUint(d[1], 10, 0)
if err != nil {
return err
}
@@ -186,9 +134,9 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
Minor: minor,
Value: value,
}
*targetTable = append(*targetTable, entry)
ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
}
}
stats.BlkioStats = parsedStats
stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
return nil
}

View File

@@ -52,13 +52,13 @@ func setMemory(dirPath string, r *configs.Resources) error {
}
// never write empty string to `memory.swap.max`, it means set to 0.
if swapStr != "" {
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
if err := fscommon.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
return err
}
}
if val := numToStr(r.Memory); val != "" {
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
if err := fscommon.WriteFile(dirPath, "memory.max", val); err != nil {
return err
}
}
@@ -66,7 +66,7 @@ func setMemory(dirPath string, r *configs.Resources) error {
// cgroup.Resources.KernelMemory is ignored
if val := numToStr(r.MemoryReservation); val != "" {
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
if err := fscommon.WriteFile(dirPath, "memory.low", val); err != nil {
return err
}
}
@@ -76,7 +76,7 @@ func setMemory(dirPath string, r *configs.Resources) error {
func statMemory(dirPath string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
statsFile, err := fscommon.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
if err != nil {
return err
}

View File

@@ -23,7 +23,7 @@ func setPids(dirPath string, r *configs.Resources) error {
return nil
}
if val := numToStr(r.PidsLimit); val != "" {
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
if err := fscommon.WriteFile(dirPath, "pids.max", val); err != nil {
return err
}
}
@@ -34,9 +34,9 @@ func setPids(dirPath string, r *configs.Resources) error {
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
// if the controller is not enabled, let's read PIDS from cgroups.procs
// (or threads if cgroup.threads is enabled)
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
contents, err := fscommon.ReadFile(dirPath, "cgroup.procs")
if errors.Is(err, unix.ENOTSUP) {
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
contents, err = fscommon.ReadFile(dirPath, "cgroup.threads")
}
if err != nil {
return err

View File

@@ -0,0 +1,51 @@
// +build linux
package fscommon
import (
"bytes"
"os"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// WriteFile writes data to a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func WriteFile(dir, file, data string) error {
fd, err := OpenFile(dir, file, unix.O_WRONLY)
if err != nil {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
}
return nil
}
// ReadFile reads data from a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func ReadFile(dir, file string) (string, error) {
fd, err := OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return "", err
}
defer fd.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(fd)
return buf.String(), err
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}

View File

@@ -1,7 +1,6 @@
package cgroups
package fscommon
import (
"bytes"
"os"
"strings"
"sync"
@@ -11,54 +10,6 @@ import (
"golang.org/x/sys/unix"
)
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
}
return openFile(dir, file, flags)
}
// ReadFile reads data from a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func ReadFile(dir, file string) (string, error) {
fd, err := OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return "", err
}
defer fd.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(fd)
return buf.String(), err
}
// WriteFile writes data to a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func WriteFile(dir, file, data string) error {
fd, err := OpenFile(dir, file, unix.O_WRONLY)
if err != nil {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
}
return nil
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}
const (
cgroupfsDir = "/sys/fs/cgroup"
cgroupfsPrefix = cgroupfsDir + "/"
@@ -77,8 +28,7 @@ var (
func prepareOpenat2() error {
prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH,
})
Flags: unix.O_DIRECTORY | unix.O_PATH})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
if err != unix.ENOSYS {
@@ -102,6 +52,7 @@ func prepareOpenat2() error {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
}
})
return prepErr
@@ -109,7 +60,10 @@ func prepareOpenat2() error {
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
func openFile(dir, file string, flags int) (*os.File, error) {
func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
}
mode := os.FileMode(0)
if TestMode && flags&os.O_WRONLY != 0 {
// "emulate" cgroup fs for unit tests

View File

@@ -8,19 +8,10 @@ import (
"math"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
var (
ErrNotValidFormat = errors.New("line is not a valid key value format")
// Deprecated: use cgroups.OpenFile instead.
OpenFile = cgroups.OpenFile
// Deprecated: use cgroups.ReadFile instead.
ReadFile = cgroups.ReadFile
// Deprecated: use cgroups.WriteFile instead.
WriteFile = cgroups.WriteFile
)
// ParseUint converts a string to an uint64 integer.
@@ -66,7 +57,7 @@ func ParseKeyValue(t string) (string, uint64, error) {
// and returns a value of the specified key. ParseUint is used for value
// conversion.
func GetValueByKey(path, file, key string) (uint64, error) {
content, err := cgroups.ReadFile(path, file)
content, err := ReadFile(path, file)
if err != nil {
return 0, err
}
@@ -104,7 +95,7 @@ func GetCgroupParamUint(path, file string) (uint64, error) {
// GetCgroupParamInt reads a single int64 value from specified cgroup file.
// If the value read is "max", the math.MaxInt64 is returned.
func GetCgroupParamInt(path, file string) (int64, error) {
contents, err := cgroups.ReadFile(path, file)
contents, err := ReadFile(path, file)
if err != nil {
return 0, err
}
@@ -122,7 +113,7 @@ func GetCgroupParamInt(path, file string) (int64, error) {
// GetCgroupParamString reads a string from the specified cgroup file.
func GetCgroupParamString(path, file string) (string, error) {
contents, err := cgroups.ReadFile(path, file)
contents, err := ReadFile(path, file)
if err != nil {
return "", err
}

View File

@@ -158,27 +158,14 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
return "", nil
}
// DeviceAllow is the dbus type "a(ss)" which means we need a struct
// to represent it in Go.
type deviceAllowEntry struct {
Path string
Perms string
}
func allowAllDevices() []systemdDbus.Property {
// Setting mode to auto and removing all DeviceAllow rules
// results in allowing access to all devices.
return []systemdDbus.Property{
newProp("DevicePolicy", "auto"),
newProp("DeviceAllow", []deviceAllowEntry{}),
}
}
// generateDeviceProperties takes the configured device rules and generates a
// corresponding set of systemd properties to configure the devices correctly.
func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, error) {
if r.SkipDevices {
return nil, nil
func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, error) {
// DeviceAllow is the type "a(ss)" which means we need a temporary struct
// to represent it in Go.
type deviceAllowEntry struct {
Path string
Perms string
}
properties := []systemdDbus.Property{
@@ -190,7 +177,7 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// Figure out the set of rules.
configEmu := &cgroupdevices.Emulator{}
for _, rule := range r.Devices {
for _, rule := range rules {
if err := configEmu.Apply(*rule); err != nil {
return nil, errors.Wrap(err, "apply rule for systemd")
}
@@ -202,7 +189,12 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
if configEmu.IsBlacklist() {
// However, if we're dealing with an allow-all rule then we can do it.
if configEmu.IsAllowAll() {
return allowAllDevices(), nil
return []systemdDbus.Property{
// Run in white-list mode by setting to "auto" and removing all
// DeviceAllow rules.
newProp("DevicePolicy", "auto"),
newProp("DeviceAllow", []deviceAllowEntry{}),
}, nil
}
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
return properties, nil
@@ -211,7 +203,8 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// Now generate the set of rules we actually need to apply. Unlike the
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
// whitelist which is the default for devices.Emulator.
finalRules, err := configEmu.Rules()
baseEmu := &cgroupdevices.Emulator{}
finalRules, err := baseEmu.Transition(configEmu)
if err != nil {
return nil, errors.Wrap(err, "get simplified rules for systemd")
}
@@ -313,7 +306,7 @@ func getUnitName(c *configs.Cgroup) string {
// isDbusError returns true if the error is a specific dbus error.
func isDbusError(err error, name string) bool {
if err != nil {
var derr dbus.Error
var derr *dbus.Error
if errors.As(err, &derr) {
return strings.Contains(derr.Name, name)
}
@@ -362,9 +355,6 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
return err
})
if err == nil {
timeout := time.NewTimer(30 * time.Second)
defer timeout.Stop()
select {
case s := <-statusChan:
close(statusChan)
@@ -372,8 +362,8 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
if s != "done" {
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
}
case <-timeout.C:
return errors.New("Timed out while waiting for systemd to remove " + unitName)
case <-time.After(time.Second):
logrus.Warnf("Timed out while waiting for StopUnit(%s) completion signal from dbus. Continuing...", unitName)
}
}
return nil
@@ -486,7 +476,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st
}
if cpus != "" {
bits, err := RangeToBits(cpus)
bits, err := rangeToBits(cpus)
if err != nil {
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
cpus, err)
@@ -495,7 +485,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st
newProp("AllowedCPUs", bits))
}
if mems != "" {
bits, err := RangeToBits(mems)
bits, err := rangeToBits(mems)
if err != nil {
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
mems, err)

View File

@@ -5,15 +5,15 @@ import (
"strconv"
"strings"
"github.com/bits-and-blooms/bitset"
"github.com/pkg/errors"
"github.com/willf/bitset"
)
// RangeToBits converts a text representation of a CPU mask (as written to
// rangeToBits converts a text representation of a CPU mask (as written to
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
// with the corresponding bits set (as consumed by systemd over dbus as
// AllowedCPUs/AllowedMemoryNodes unit property value).
func RangeToBits(str string) ([]byte, error) {
func rangeToBits(str string) ([]byte, error) {
bits := &bitset.BitSet{}
for _, r := range strings.Split(str, ",") {

View File

@@ -17,16 +17,14 @@ var (
dbusRootless bool
)
type dbusConnManager struct{}
type dbusConnManager struct {
}
// newDbusConnManager initializes systemd dbus connection manager.
func newDbusConnManager(rootless bool) *dbusConnManager {
dbusMu.Lock()
defer dbusMu.Unlock()
if dbusInited && rootless != dbusRootless {
panic("can't have both root and rootless dbus")
}
dbusInited = true
dbusRootless = rootless
return &dbusConnManager{}
}

View File

@@ -61,7 +61,7 @@ var legacySubsystems = []subsystem{
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
var properties []systemdDbus.Property
deviceProperties, err := generateDeviceProperties(r)
deviceProperties, err := generateDeviceProperties(r.Devices)
if err != nil {
return nil, err
}
@@ -207,10 +207,9 @@ func (m *legacyManager) Destroy() error {
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
// Both on success and on error, cleanup all the cgroups
// we are aware of, as some of them were created directly
// by Apply() and are not managed by systemd.
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
// Both on success and on error, cleanup all the cgroups we are aware of.
// Some of them were created directly by Apply() and are not managed by systemd.
if err := cgroups.RemovePaths(m.paths); err != nil {
return err
}
@@ -238,7 +237,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
}
default:
if path, ok := m.paths[name]; ok {
if err := os.MkdirAll(path, 0o755); err != nil {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
@@ -339,24 +338,27 @@ func (m *legacyManager) Set(r *configs.Resources) error {
return err
}
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
targetFreezerState := configs.Undefined
if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {

View File

@@ -96,7 +96,7 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
newProp("CPUWeight", num))
case "cpuset.cpus", "cpuset.mems":
bits, err := RangeToBits(v)
bits, err := rangeToBits(v)
if err != nil {
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
}
@@ -172,7 +172,7 @@ func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst
// aren't the end of the world, but it is a bit concerning. However
// it's unclear if systemd removes all eBPF programs attached when
// doing SetUnitProperties...
deviceProperties, err := generateDeviceProperties(r)
deviceProperties, err := generateDeviceProperties(r.Devices)
if err != nil {
return nil, err
}
@@ -418,24 +418,27 @@ func (m *unifiedManager) Set(r *configs.Resources) error {
return err
}
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
targetFreezerState := configs.Undefined
if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {

View File

@@ -15,6 +15,7 @@ import (
"sync"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@@ -87,7 +88,7 @@ func GetAllSubsystems() ([]string, error) {
// - freezer: implemented in kernel 5.2
// We assume these are always available, as it is hard to detect availability.
pseudo := []string{"devices", "freezer"}
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
data, err := fscommon.ReadFile("/sys/fs/cgroup", "cgroup.controllers")
if err != nil {
return nil, err
}
@@ -266,6 +267,7 @@ func RemovePaths(paths map[string]string) (err error) {
case retries - 1:
logrus.WithError(err).Error("Failed to remove cgroup")
}
}
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
@@ -374,7 +376,7 @@ func WriteCgroupProc(dir string, pid int) error {
return nil
}
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
file, err := fscommon.OpenFile(dir, CgroupProcesses, os.O_WRONLY)
if err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}