Bump dependency opencontainers/runc@v1.0.0-rc9

This commit is contained in:
Odin Ugedal
2019-10-05 13:45:54 +02:00
parent 00096d8fed
commit 708a917b7d
30 changed files with 1463 additions and 197 deletions

View File

@@ -5,6 +5,7 @@ go_library(
srcs = [
"apply_nosystemd.go",
"apply_systemd.go",
"unified_hierarchy.go",
],
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd",
importpath = "github.com/opencontainers/runc/libcontainer/cgroups/systemd",
@@ -28,7 +29,6 @@ go_library(
],
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/coreos/go-systemd/dbus:go_default_library",
"//vendor/github.com/coreos/go-systemd/util:go_default_library",
"//vendor/github.com/godbus/dbus:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",

View File

@@ -14,7 +14,6 @@ import (
"time"
systemdDbus "github.com/coreos/go-systemd/dbus"
systemdUtil "github.com/coreos/go-systemd/util"
"github.com/godbus/dbus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
@@ -22,7 +21,7 @@ import (
"github.com/sirupsen/logrus"
)
type Manager struct {
type LegacyManager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
@@ -50,7 +49,7 @@ func (s subsystemSet) Get(name string) (subsystem, error) {
return nil, errSubsystemDoesNotExist
}
var subsystems = subsystemSet{
var legacySubsystems = subsystemSet{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
@@ -72,11 +71,8 @@ const (
)
var (
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasDelegateSlice bool
connLock sync.Mutex
theConn *systemdDbus.Conn
)
func newProp(name string, units interface{}) systemdDbus.Property {
@@ -86,8 +82,23 @@ func newProp(name string, units interface{}) systemdDbus.Property {
}
}
// NOTE: This function comes from package github.com/coreos/go-systemd/util
// It was borrowed here to avoid a dependency on cgo.
//
// IsRunningSystemd checks whether the host was booted with systemd as its init
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
// checks whether /run/systemd/system/ exists and is a directory.
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
func isRunningSystemd() bool {
fi, err := os.Lstat("/run/systemd/system")
if err != nil {
return false
}
return fi.IsDir()
}
func UseSystemd() bool {
if !systemdUtil.IsRunningSystemd() {
if !isRunningSystemd() {
return false
}
@@ -100,82 +111,31 @@ func UseSystemd() bool {
if err != nil {
return false
}
// Assume we have StartTransientUnit
hasStartTransientUnit = true
// But if we get UnknownMethod error we don't
if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
hasStartTransientUnit = false
return hasStartTransientUnit
}
}
}
// Assume we have the ability to start a transient unit as a slice
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
hasStartTransientSliceUnit = true
// To ensure simple clean-up, we create a slice off the root with no hierarchy
slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
if _, ok := err.(dbus.Error); ok {
hasStartTransientSliceUnit = false
}
}
for i := 0; i <= testSliceWait; i++ {
if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
hasStartTransientSliceUnit = false
break
}
}
} else {
break
}
time.Sleep(time.Millisecond)
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(slice, "replace", nil)
// Assume StartTransientUnit on a slice allows Delegate
hasDelegateSlice = true
dlSlice := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(slice, "replace", []systemdDbus.Property{dlSlice}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
// Starting with systemd v237, Delegate is not even a property of slices anymore,
// so the D-Bus call fails with "InvalidArgs" error.
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") || strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.InvalidArgs") {
hasDelegateSlice = false
}
}
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(slice, "replace", nil)
}
return hasStartTransientUnit
return true
}
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
if !systemdUtil.IsRunningSystemd() {
if !isRunningSystemd() {
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
}
if cgroups.IsCgroup2UnifiedMode() {
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &UnifiedManager{
Cgroups: config,
Paths: paths,
}
}, nil
}
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &Manager{
return &LegacyManager{
Cgroups: config,
Paths: paths,
}
}, nil
}
func (m *Manager) Apply(pid int) error {
func (m *LegacyManager) Apply(pid int) error {
var (
c = m.Cgroups
unitName = getUnitName(c)
@@ -208,10 +168,6 @@ func (m *Manager) Apply(pid int) error {
// if we create a slice, the parent is defined via a Wants=
if strings.HasSuffix(unitName, ".slice") {
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
if !hasStartTransientSliceUnit {
return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
}
properties = append(properties, systemdDbus.PropWants(slice))
} else {
// otherwise, we use Slice=
@@ -224,12 +180,7 @@ func (m *Manager) Apply(pid int) error {
}
// Check if we can delegate. This is only supported on systemd versions 218 and above.
if strings.HasSuffix(unitName, ".slice") {
if hasDelegateSlice {
// systemd 237 and above no longer allows delegation on a slice
properties = append(properties, newProp("Delegate", true))
}
} else {
if !strings.HasSuffix(unitName, ".slice") {
// Assume scopes always support delegation.
properties = append(properties, newProp("Delegate", true))
}
@@ -310,7 +261,7 @@ func (m *Manager) Apply(pid int) error {
}
paths := make(map[string]string)
for _, s := range subsystems {
for _, s := range legacySubsystems {
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
@@ -325,7 +276,7 @@ func (m *Manager) Apply(pid int) error {
return nil
}
func (m *Manager) Destroy() error {
func (m *LegacyManager) Destroy() error {
if m.Cgroups.Paths != nil {
return nil
}
@@ -339,7 +290,7 @@ func (m *Manager) Destroy() error {
return nil
}
func (m *Manager) GetPaths() map[string]string {
func (m *LegacyManager) GetPaths() map[string]string {
m.mu.Lock()
paths := m.Paths
m.mu.Unlock()
@@ -351,6 +302,7 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
@@ -361,7 +313,7 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
}
func joinCgroups(c *configs.Cgroup, pid int) error {
for _, sys := range subsystems {
for _, sys := range legacySubsystems {
name := sys.Name()
switch name {
case "name=systemd":
@@ -456,14 +408,14 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
}
func (m *Manager) Freeze(state configs.FreezerState) error {
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
path, err := getSubsystemPath(m.Cgroups, "freezer")
if err != nil {
return err
}
prevState := m.Cgroups.Resources.Freezer
m.Cgroups.Resources.Freezer = state
freezer, err := subsystems.Get("freezer")
freezer, err := legacySubsystems.Get("freezer")
if err != nil {
return err
}
@@ -475,7 +427,7 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
return nil
}
func (m *Manager) GetPids() ([]int, error) {
func (m *LegacyManager) GetPids() ([]int, error) {
path, err := getSubsystemPath(m.Cgroups, "devices")
if err != nil {
return nil, err
@@ -483,7 +435,7 @@ func (m *Manager) GetPids() ([]int, error) {
return cgroups.GetPids(path)
}
func (m *Manager) GetAllPids() ([]int, error) {
func (m *LegacyManager) GetAllPids() ([]int, error) {
path, err := getSubsystemPath(m.Cgroups, "devices")
if err != nil {
return nil, err
@@ -491,12 +443,12 @@ func (m *Manager) GetAllPids() ([]int, error) {
return cgroups.GetAllPids(path)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, err := subsystems.Get(name)
sys, err := legacySubsystems.Get(name)
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
continue
}
@@ -508,13 +460,13 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
return stats, nil
}
func (m *Manager) Set(container *configs.Config) error {
func (m *LegacyManager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
for _, sys := range subsystems {
for _, sys := range legacySubsystems {
// Get the subsystem path, but don't error out for not found cgroups.
path, err := getSubsystemPath(container.Cgroups, sys.Name())
if err != nil && !cgroups.IsNotFound(err) {

View File

@@ -0,0 +1,329 @@
// +build linux,!static_build
package systemd
import (
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
"strings"
"sync"
"time"
systemdDbus "github.com/coreos/go-systemd/dbus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)
type UnifiedManager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
}
var unifiedSubsystems = subsystemSet{
&fs.CpusetGroupV2{},
&fs.FreezerGroupV2{},
&fs.CpuGroupV2{},
&fs.MemoryGroupV2{},
&fs.IOGroupV2{},
&fs.PidsGroupV2{},
}
func (m *UnifiedManager) Apply(pid int) error {
var (
c = m.Cgroups
unitName = getUnitName(c)
slice = "system.slice"
properties []systemdDbus.Property
)
if c.Paths != nil {
paths := make(map[string]string)
for name, path := range c.Paths {
_, err := getSubsystemPath(m.Cgroups, name)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return err
}
paths[name] = path
}
m.Paths = paths
return cgroups.EnterPid(m.Paths, pid)
}
if c.Parent != "" {
slice = c.Parent
}
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
// if we create a slice, the parent is defined via a Wants=
if strings.HasSuffix(unitName, ".slice") {
properties = append(properties, systemdDbus.PropWants(slice))
} else {
// otherwise, we use Slice=
properties = append(properties, systemdDbus.PropSlice(slice))
}
// only add pid if its valid, -1 is used w/ general slice creation.
if pid != -1 {
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
}
// Check if we can delegate. This is only supported on systemd versions 218 and above.
if !strings.HasSuffix(unitName, ".slice") {
// Assume scopes always support delegation.
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
newProp("MemoryAccounting", true),
newProp("CPUAccounting", true),
newProp("BlockIOAccounting", true))
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
properties = append(properties,
newProp("DefaultDependencies", false))
if c.Resources.Memory != 0 {
properties = append(properties,
newProp("MemoryLimit", uint64(c.Resources.Memory)))
}
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
// corresponds to USEC_INFINITY in systemd
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
// always setting a property value ensures we can apply a quota and remove it later
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
if c.Resources.CpuQuota > 0 {
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
properties = append(properties,
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
if c.Resources.PidsLimit > 0 {
properties = append(properties,
newProp("TasksAccounting", true),
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
if err := setKernelMemory(c); err != nil {
return err
}
}
statusChan := make(chan string, 1)
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
select {
case <-statusChan:
case <-time.After(time.Second):
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
}
} else if !isUnitExists(err) {
return err
}
if err := joinCgroupsV2(c, pid); err != nil {
return err
}
paths := make(map[string]string)
for _, s := range unifiedSubsystems {
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return err
}
paths[s.Name()] = subsystemPath
}
m.Paths = paths
return nil
}
func (m *UnifiedManager) Destroy() error {
if m.Cgroups.Paths != nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
if err := cgroups.RemovePaths(m.Paths); err != nil {
return err
}
m.Paths = make(map[string]string)
return nil
}
func (m *UnifiedManager) GetPaths() map[string]string {
m.mu.Lock()
paths := m.Paths
m.mu.Unlock()
return paths
}
func createCgroupsv2Path(path string) (Err error) {
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
if err != nil {
return err
}
if !filepath.HasPrefix(path, "/sys/fs/cgroup") {
return fmt.Errorf("invalid cgroup path %s", path)
}
res := ""
for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") {
if i == 0 {
res = fmt.Sprintf("+%s", c)
} else {
res = res + fmt.Sprintf(" +%s", c)
}
}
resByte := []byte(res)
current := "/sys/fs"
elements := strings.Split(path, "/")
for i, e := range elements[3:] {
current = filepath.Join(current, e)
if i > 0 {
if err := os.Mkdir(current, 0755); err != nil {
if !os.IsExist(err) {
return err
}
} else {
// If the directory was created, be sure it is not left around on errors.
defer func() {
if Err != nil {
os.Remove(current)
}
}()
}
}
if i < len(elements[3:])-1 {
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil {
return err
}
}
}
return nil
}
func joinCgroupsV2(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "memory")
if err != nil {
return err
}
return createCgroupsv2Path(path)
}
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
path, err := getSubsystemPath(m.Cgroups, "freezer")
if err != nil {
return err
}
prevState := m.Cgroups.Resources.Freezer
m.Cgroups.Resources.Freezer = state
freezer, err := unifiedSubsystems.Get("freezer")
if err != nil {
return err
}
err = freezer.Set(path, m.Cgroups)
if err != nil {
m.Cgroups.Resources.Freezer = prevState
return err
}
return nil
}
func (m *UnifiedManager) GetPids() ([]int, error) {
path, err := getSubsystemPath(m.Cgroups, "devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(path)
}
func (m *UnifiedManager) GetAllPids() ([]int, error) {
path, err := getSubsystemPath(m.Cgroups, "devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(path)
}
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, err := unifiedSubsystems.Get(name)
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
continue
}
if err := sys.GetStats(path, stats); err != nil {
return nil, err
}
}
return stats, nil
}
func (m *UnifiedManager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
for _, sys := range unifiedSubsystems {
// Get the subsystem path, but don't error out for not found cgroups.
path, err := getSubsystemPath(container.Cgroups, sys.Name())
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := sys.Set(path, container.Cgroups); err != nil {
return err
}
}
if m.Paths["cpu"] != "" {
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
return err
}
}
return nil
}