vendor: update google/cadvisor and opencontainers/runc
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
41
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/BUILD
generated
vendored
41
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/BUILD
generated
vendored
@@ -3,73 +3,66 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"apply_nosystemd.go",
|
||||
"apply_systemd.go",
|
||||
"unified_hierarchy.go",
|
||||
"common.go",
|
||||
"unsupported.go",
|
||||
"user.go",
|
||||
"v1.go",
|
||||
"v2.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/cgroups/systemd",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
deps = [
|
||||
"//vendor/github.com/coreos/go-systemd/v22/dbus:go_default_library",
|
||||
"//vendor/github.com/godbus/dbus/v5:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/coreos/go-systemd/dbus:go_default_library",
|
||||
"//vendor/github.com/godbus/dbus:go_default_library",
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:darwin": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:dragonfly": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:freebsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:ios": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/coreos/go-systemd/dbus:go_default_library",
|
||||
"//vendor/github.com/godbus/dbus:go_default_library",
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:nacl": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:netbsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:openbsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:plan9": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:solaris": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:windows": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
|
424
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
424
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
@@ -0,0 +1,424 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
connOnce sync.Once
|
||||
connDbus *systemdDbus.Conn
|
||||
connErr error
|
||||
|
||||
versionOnce sync.Once
|
||||
version int
|
||||
versionErr error
|
||||
)
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.IsDir()
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func groupPrefix(ruleType configs.DeviceType) (string, error) {
|
||||
switch ruleType {
|
||||
case configs.BlockDevice:
|
||||
return "block-", nil
|
||||
case configs.CharDevice:
|
||||
return "char-", nil
|
||||
default:
|
||||
return "", errors.Errorf("device type %v has no group prefix", ruleType)
|
||||
}
|
||||
}
|
||||
|
||||
// findDeviceGroup tries to find the device group name (as listed in
|
||||
// /proc/devices) with the type prefixed as requried for DeviceAllow, for a
|
||||
// given (type, major) combination. If more than one device group exists, an
|
||||
// arbitrary one is chosen.
|
||||
func findDeviceGroup(ruleType configs.DeviceType, ruleMajor int64) (string, error) {
|
||||
fh, err := os.Open("/proc/devices")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
prefix, err := groupPrefix(ruleType)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(fh)
|
||||
var currentType configs.DeviceType
|
||||
for scanner.Scan() {
|
||||
// We need to strip spaces because the first number is column-aligned.
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Handle the "header" lines.
|
||||
switch line {
|
||||
case "Block devices:":
|
||||
currentType = configs.BlockDevice
|
||||
continue
|
||||
case "Character devices:":
|
||||
currentType = configs.CharDevice
|
||||
continue
|
||||
case "":
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip lines unrelated to our type.
|
||||
if currentType != ruleType {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse out the (major, name).
|
||||
var (
|
||||
currMajor int64
|
||||
currName string
|
||||
)
|
||||
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
|
||||
if err == nil {
|
||||
err = errors.Errorf("wrong number of fields")
|
||||
}
|
||||
return "", errors.Wrapf(err, "scan /proc/devices line %q", line)
|
||||
}
|
||||
|
||||
if currMajor == ruleMajor {
|
||||
return prefix + currName, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", errors.Wrap(err, "reading /proc/devices")
|
||||
}
|
||||
// Couldn't find the device group.
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// generateDeviceProperties takes the configured device rules and generates a
|
||||
// corresponding set of systemd properties to configure the devices correctly.
|
||||
func generateDeviceProperties(rules []*configs.DeviceRule) ([]systemdDbus.Property, error) {
|
||||
// DeviceAllow is the type "a(ss)" which means we need a temporary struct
|
||||
// to represent it in Go.
|
||||
type deviceAllowEntry struct {
|
||||
Path string
|
||||
Perms string
|
||||
}
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
// Always run in the strictest white-list mode.
|
||||
newProp("DevicePolicy", "strict"),
|
||||
// Empty the DeviceAllow array before filling it.
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}
|
||||
|
||||
// Figure out the set of rules.
|
||||
configEmu := &devices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := configEmu.Apply(*rule); err != nil {
|
||||
return nil, errors.Wrap(err, "apply rule for systemd")
|
||||
}
|
||||
}
|
||||
// systemd doesn't support blacklists. So we log a warning, and tell
|
||||
// systemd to act as a deny-all whitelist. This ruleset will be replaced
|
||||
// with our normal fallback code. This may result in spurrious errors, but
|
||||
// the only other option is to error out here.
|
||||
if configEmu.IsBlacklist() {
|
||||
// However, if we're dealing with an allow-all rule then we can do it.
|
||||
if configEmu.IsAllowAll() {
|
||||
return []systemdDbus.Property{
|
||||
// Run in white-list mode by setting to "auto" and removing all
|
||||
// DeviceAllow rules.
|
||||
newProp("DevicePolicy", "auto"),
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}, nil
|
||||
}
|
||||
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// Now generate the set of rules we actually need to apply. Unlike the
|
||||
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
|
||||
// whitelist which is the default for devices.Emulator.
|
||||
baseEmu := &devices.Emulator{}
|
||||
finalRules, err := baseEmu.Transition(configEmu)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get simplified rules for systemd")
|
||||
}
|
||||
var deviceAllowList []deviceAllowEntry
|
||||
for _, rule := range finalRules {
|
||||
if !rule.Allow {
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||
}
|
||||
switch rule.Type {
|
||||
case configs.BlockDevice, configs.CharDevice:
|
||||
default:
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||
}
|
||||
|
||||
entry := deviceAllowEntry{
|
||||
Perms: string(rule.Permissions),
|
||||
}
|
||||
|
||||
// systemd has a fairly odd (though understandable) syntax here, and
|
||||
// because of the OCI configuration format we have to do quite a bit of
|
||||
// trickery to convert things:
|
||||
//
|
||||
// * Concrete rules with non-wildcard major/minor numbers have to use
|
||||
// /dev/{block,char} paths. This is slightly odd because it means
|
||||
// that we cannot add whitelist rules for devices that don't exist,
|
||||
// but there's not too much we can do about that.
|
||||
//
|
||||
// However, path globbing is not support for path-based rules so we
|
||||
// need to handle wildcards in some other manner.
|
||||
//
|
||||
// * Wildcard-minor rules have to specify a "device group name" (the
|
||||
// second column in /proc/devices).
|
||||
//
|
||||
// * Wildcard (major and minor) rules can just specify a glob with the
|
||||
// type ("char-*" or "block-*").
|
||||
//
|
||||
// The only type of rule we can't handle is wildcard-major rules, and
|
||||
// so we'll give a warning in that case (note that the fallback code
|
||||
// will insert any rules systemd couldn't handle). What amazing fun.
|
||||
|
||||
if rule.Major == configs.Wildcard {
|
||||
// "_ *:n _" rules aren't supported by systemd.
|
||||
if rule.Minor != configs.Wildcard {
|
||||
logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// "_ *:* _" rules just wildcard everything.
|
||||
prefix, err := groupPrefix(rule.Type)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entry.Path = prefix + "*"
|
||||
} else if rule.Minor == configs.Wildcard {
|
||||
// "_ n:* _" rules require a device group from /proc/devices.
|
||||
group, err := findDeviceGroup(rule.Type, rule.Major)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major)
|
||||
}
|
||||
if group == "" {
|
||||
// Couldn't find a group.
|
||||
logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
|
||||
continue
|
||||
}
|
||||
entry.Path = group
|
||||
} else {
|
||||
// "_ n:m _" rules are just a path in /dev/{block,char}/.
|
||||
switch rule.Type {
|
||||
case configs.BlockDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor)
|
||||
case configs.CharDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
|
||||
}
|
||||
}
|
||||
deviceAllowList = append(deviceAllowList, entry)
|
||||
}
|
||||
|
||||
properties = append(properties, newProp("DeviceAllow", deviceAllowList))
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// getDbusConnection lazy initializes systemd dbus connection
|
||||
// and returns it
|
||||
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
|
||||
connOnce.Do(func() {
|
||||
if rootless {
|
||||
connDbus, connErr = NewUserSystemdDbus()
|
||||
} else {
|
||||
connDbus, connErr = systemdDbus.New()
|
||||
}
|
||||
})
|
||||
return connDbus, connErr
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error {
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
dbusConnection.ResetFailedUnit(unitName)
|
||||
return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil {
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StopUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func systemdVersion(conn *systemdDbus.Conn) (int, error) {
|
||||
versionOnce.Do(func() {
|
||||
version = -1
|
||||
verStr, err := conn.GetManagerProperty("Version")
|
||||
if err != nil {
|
||||
versionErr = err
|
||||
return
|
||||
}
|
||||
|
||||
version, versionErr = systemdVersionAtoi(verStr)
|
||||
return
|
||||
})
|
||||
|
||||
return version, versionErr
|
||||
}
|
||||
|
||||
func systemdVersionAtoi(verStr string) (int, error) {
|
||||
// verStr should be of the form:
|
||||
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32"
|
||||
// all the input strings include quotes, and the output int should be 245
|
||||
// thus, we unconditionally remove the `"v`
|
||||
// and then match on the first integer we can grab
|
||||
re := regexp.MustCompile(`"?v?([0-9]+)`)
|
||||
matches := re.FindStringSubmatch(verStr)
|
||||
if len(matches) < 2 {
|
||||
return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||
}
|
||||
ver, err := strconv.Atoi(matches[1])
|
||||
return ver, errors.Wrapf(err, "can't parse version %s", verStr)
|
||||
}
|
||||
|
||||
func addCpuQuota(conn *systemdDbus.Conn, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
if period != 0 {
|
||||
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||
sdVer, err := systemdVersion(conn)
|
||||
if err != nil {
|
||||
logrus.Warnf("systemdVersion: %s", err)
|
||||
} else if sdVer >= 242 {
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPeriodUSec", period))
|
||||
}
|
||||
}
|
||||
if quota != 0 || period != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
if period == 0 {
|
||||
// assume the default kernel value of 100000 us (100 ms), same for v1 and v2.
|
||||
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
||||
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
period = 100000
|
||||
}
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
}
|
312
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
312
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
@@ -1,312 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type UnifiedManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
}
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCgroupsV2(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
path, err := getSubsystemPath(m.Cgroups, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = map[string]string{
|
||||
"pids": path,
|
||||
"memory": path,
|
||||
"io": path,
|
||||
"cpu": path,
|
||||
"devices": path,
|
||||
"cpuset": path,
|
||||
"freezer": path,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
func (m *UnifiedManager) GetUnifiedPath() (string, error) {
|
||||
unifiedPath := ""
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for k, v := range m.Paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
return unifiedPath,
|
||||
errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)
|
||||
}
|
||||
}
|
||||
if unifiedPath == "" {
|
||||
// FIXME: unified path could be detected even when no controller is available
|
||||
return unifiedPath, errors.New("cannot detect unified path")
|
||||
}
|
||||
return unifiedPath, nil
|
||||
}
|
||||
func createCgroupsv2Path(path string) (Err error) {
|
||||
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !filepath.HasPrefix(path, "/sys/fs/cgroup") {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
res := ""
|
||||
for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") {
|
||||
if i == 0 {
|
||||
res = fmt.Sprintf("+%s", c)
|
||||
} else {
|
||||
res = res + fmt.Sprintf(" +%s", c)
|
||||
}
|
||||
}
|
||||
resByte := []byte(res)
|
||||
|
||||
current := "/sys/fs"
|
||||
elements := strings.Split(path, "/")
|
||||
for i, e := range elements[3:] {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
if i < len(elements[3:])-1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinCgroupsV2(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return createCgroupsv2Path(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.Cgroups, path, false)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(container *configs.Config) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
@@ -3,7 +3,7 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
@@ -14,54 +14,58 @@ type Manager struct {
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
func IsRunningSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", fmt.Errorf("Systemd not supported")
|
||||
func (m *Manager) Path(_ string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return false
|
||||
}
|
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
addr, err := DetectUserDbusSessionBusAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uid, err := DetectUID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||
conn, err := dbus.Dial(addr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "error while dialing %q", addr)
|
||||
}
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
|
||||
}
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
|
||||
}
|
||||
return conn, nil
|
||||
})
|
||||
}
|
||||
|
||||
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
|
||||
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
|
||||
//
|
||||
// Otherwise returns os.Getuid() .
|
||||
func DetectUID() (int, error) {
|
||||
if !system.RunningInUserNS() {
|
||||
return os.Getuid(), nil
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, errors.Wrap(err, "could not execute `busctl --user --no-pager status`")
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "OwnerUID=") {
|
||||
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||
i, err := strconv.Atoi(uidStr)
|
||||
if err != nil {
|
||||
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return -1, errors.New("could not detect the OwnerUID")
|
||||
}
|
||||
|
||||
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
|
||||
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
|
||||
// Otherwise parses the value from `systemctl --user show-environment` .
|
||||
func DetectUserDbusSessionBusAddress() (string, error) {
|
||||
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
|
||||
return env, nil
|
||||
}
|
||||
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
|
||||
busPath := filepath.Join(xdr, "bus")
|
||||
if _, err := os.Stat(busPath); err == nil {
|
||||
busAddress := "unix:path=" + busPath
|
||||
return busAddress, nil
|
||||
}
|
||||
}
|
||||
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
|
||||
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`")
|
||||
}
|
@@ -4,27 +4,30 @@ package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/godbus/dbus"
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type LegacyManager struct {
|
||||
type legacyManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &legacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
@@ -65,88 +68,56 @@ var legacySubsystems = subsystemSet{
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
const (
|
||||
testScopeWait = 4
|
||||
testSliceWait = 4
|
||||
)
|
||||
func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
r := c.Resources
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
)
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func isRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
deviceProperties, err := generateDeviceProperties(r.Devices)
|
||||
if err != nil {
|
||||
return false
|
||||
return nil, err
|
||||
}
|
||||
return fi.IsDir()
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(r.Memory)))
|
||||
}
|
||||
|
||||
if r.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", r.CpuShares))
|
||||
}
|
||||
|
||||
addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(r.BlkioWeight)))
|
||||
}
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
if !isRunningSystemd() {
|
||||
return false
|
||||
}
|
||||
|
||||
connLock.Lock()
|
||||
defer connLock.Unlock()
|
||||
|
||||
if theConn == nil {
|
||||
var err error
|
||||
theConn, err = systemdDbus.New()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
if !isRunningSystemd() {
|
||||
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
|
||||
}
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &UnifiedManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &LegacyManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Apply(pid int) error {
|
||||
func (m *legacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
_, err := getSubsystemPath(m.cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
@@ -156,8 +127,8 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
m.paths = paths
|
||||
return cgroups.EnterPid(m.paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
@@ -196,63 +167,26 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
resourcesProperties, err := genV1ResourcesProperties(c, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties = append(properties, resourcesProperties...)
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
if err := enableKmem(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -262,7 +196,7 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
|
||||
subsystemPath, err := getSubsystemPath(m.cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
@@ -272,33 +206,33 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.Paths = paths
|
||||
m.paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
func (m *legacyManager) Destroy() error {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(dbusConnection, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
m.paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPaths() map[string]string {
|
||||
func (m *legacyManager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
@@ -352,40 +286,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem)
|
||||
if err != nil {
|
||||
@@ -412,46 +312,46 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.Cgroups, "freezer")
|
||||
func (m *legacyManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.cgroups, "freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(path, m.Cgroups)
|
||||
err = freezer.Set(path, m.cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
func (m *legacyManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
func (m *legacyManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
for name, path := range m.paths {
|
||||
sys, err := legacySubsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
@@ -464,41 +364,65 @@ func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Set(container *configs.Config) error {
|
||||
func (m *legacyManager) Set(container *configs.Config) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties, err := genV1ResourcesProperties(container.Cgroups, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Figure out the current freezer state, so we can revert to it after we
|
||||
// temporarily freeze the container.
|
||||
targetFreezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetFreezerState == configs.Undefined {
|
||||
targetFreezerState = configs.Thawed
|
||||
}
|
||||
|
||||
// We have to freeze the container while systemd sets the cgroup settings.
|
||||
// The reason for this is that systemd's application of DeviceAllow rules
|
||||
// is done disruptively, resulting in spurrious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we freeze the container to avoid them hitting the cgroup
|
||||
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
}
|
||||
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset freezer state before we apply the configuration, to avoid clashing
|
||||
// with the freezer setting in the configuration.
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
func enableKmem(c *configs.Cgroup) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
@@ -519,16 +443,28 @@ func setKernelMemory(c *configs.Cgroup) error {
|
||||
return fs.EnableKernelMemoryAccounting(path)
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
func (m *legacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
func (m *legacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return freezer.(*fs.FreezerGroup).GetState(path)
|
||||
}
|
||||
|
||||
func (m *legacyManager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
@@ -0,0 +1,357 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type unifiedManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
rootless bool
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
|
||||
return &unifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
rootless: rootless,
|
||||
}
|
||||
}
|
||||
|
||||
func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
r := c.Resources
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
// aren't the end of the world, but it is a bit concerning. However
|
||||
// it's unclear if systemd removes all eBPF programs attached when
|
||||
// doing SetUnitProperties...
|
||||
deviceProperties, err := generateDeviceProperties(r.Devices)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryMax", uint64(r.Memory)))
|
||||
}
|
||||
if r.MemoryReservation != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLow", uint64(r.MemoryReservation)))
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if swap != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemorySwapMax", uint64(swap)))
|
||||
}
|
||||
|
||||
if r.CpuWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", r.CpuWeight))
|
||||
}
|
||||
|
||||
addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
// ignore r.KernelMemory
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
return cgroups.WriteCgroupProc(m.path, pid)
|
||||
}
|
||||
|
||||
slice := "system.slice"
|
||||
if m.rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("IOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resourcesProperties, err := genV2ResourcesProperties(c, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties = append(properties, resourcesProperties...)
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
||||
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
|
||||
}
|
||||
|
||||
if err = m.initPath(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Destroy() error {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(dbusConnection, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX this is probably not needed, systemd should handle it
|
||||
err = os.Remove(m.path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Path(_ string) string {
|
||||
return m.path
|
||||
}
|
||||
|
||||
// getSliceFull value is used in initPath.
|
||||
// The value is incompatible with systemdDbus.PropSlice.
|
||||
func (m *unifiedManager) getSliceFull() (string, error) {
|
||||
c := m.cgroups
|
||||
slice := "system.slice"
|
||||
if m.rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if m.rootless {
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
|
||||
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
managerCG, err := strconv.Unquote(managerCGQuoted)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
slice = filepath.Join(managerCG, slice)
|
||||
}
|
||||
|
||||
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
||||
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
||||
return slice, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) initPath() error {
|
||||
if m.path != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
sliceFull, err := m.getSliceFull()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c := m.cgroups
|
||||
path := filepath.Join(sliceFull, getUnitName(c))
|
||||
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// an example of the final path in rootless:
|
||||
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
||||
m.path = path
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.cgroups, m.path, m.rootless)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPids() ([]int, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetAllPids() ([]int, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Set(container *configs.Config) error {
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(m.cgroups, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Figure out the current freezer state, so we can revert to it after we
|
||||
// temporarily freeze the container.
|
||||
targetFreezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetFreezerState == configs.Undefined {
|
||||
targetFreezerState = configs.Thawed
|
||||
}
|
||||
|
||||
// We have to freeze the container while systemd sets the cgroup settings.
|
||||
// The reason for this is that systemd's application of DeviceAllow rules
|
||||
// is done disruptively, resulting in spurrious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we freeze the container to avoid them hitting the cgroup
|
||||
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
}
|
||||
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return errors.Wrap(err, "error while setting unit properties")
|
||||
}
|
||||
|
||||
// Reset freezer state before we apply the configuration, to avoid clashing
|
||||
// with the freezer setting in the configuration.
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.path
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return fsMgr.GetFreezerState()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Exists() bool {
|
||||
return cgroups.PathExists(m.path)
|
||||
}
|
Reference in New Issue
Block a user