Update dependency opencontainer/runc

This commit is contained in:
Odin Ugedal
2019-06-20 20:34:03 +02:00
parent 81c8552d7e
commit 2bcdb944f0
42 changed files with 1415 additions and 631 deletions

View File

@@ -15,6 +15,7 @@ go_library(
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/docker/go-units:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),

View File

@@ -317,7 +317,7 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err

View File

@@ -3,6 +3,7 @@
package fs
import (
"errors"
"fmt"
"io/ioutil"
"os"
@@ -17,7 +18,12 @@ import (
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// Ensure that kernel memory is available in this kernel build. If it
// isn't, we just ignore it because EnableKernelMemoryAccounting is
// automatically called for all memory limits.
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
return nil
}
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
@@ -34,8 +40,9 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
// We have specifically been asked to set a kmem limit. If the kernel
// doesn't support it we *must* error out.
return errors.New("kernel memory accounting not supported by this kernel")
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"

View File

@@ -2,10 +2,14 @@
package fs
import (
"errors"
)
func EnableKernelMemoryAccounting(path string) error {
return nil
}
func setKernelMemory(path string, kernelMemoryLimit int64) error {
return nil
return errors.New("kernel memory accounting disabled in this runc build")
}

View File

@@ -18,6 +18,10 @@ func UseSystemd() bool {
return false
}
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Apply(pid int) error {
return fmt.Errorf("Systemd not supported")
}

View File

@@ -5,6 +5,7 @@ package systemd
import (
"errors"
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
@@ -71,13 +72,11 @@ const (
)
var (
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasTransientDefaultDependencies bool
hasDelegateScope bool
hasDelegateSlice bool
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasDelegateSlice bool
)
func newProp(name string, units interface{}) systemdDbus.Property {
@@ -115,53 +114,6 @@ func UseSystemd() bool {
}
}
// Ensure the scope name we use doesn't exist. Use the Pid to
// avoid collisions between multiple libcontainer users on a
// single host.
scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid())
testScopeExists := true
for i := 0; i <= testScopeWait; i++ {
if _, err := theConn.StopUnit(scope, "replace", nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
testScopeExists = false
break
}
}
}
time.Sleep(time.Millisecond)
}
// Bail out if we can't kill this scope without testing for DefaultDependencies
if testScopeExists {
return hasStartTransientUnit
}
// Assume StartTransientUnit on a scope allows DefaultDependencies
hasTransientDefaultDependencies = true
ddf := newProp("DefaultDependencies", false)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasTransientDefaultDependencies = false
}
}
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
// Assume StartTransientUnit on a scope allows Delegate
hasDelegateScope = true
dlScope := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dlScope}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasDelegateScope = false
}
}
}
// Assume we have the ability to start a transient unit as a slice
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
@@ -206,12 +158,23 @@ func UseSystemd() bool {
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
theConn.StopUnit(slice, "replace", nil)
}
return hasStartTransientUnit
}
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
if !systemdUtil.IsRunningSystemd() {
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
}
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &Manager{
Cgroups: config,
Paths: paths,
}
}, nil
}
func (m *Manager) Apply(pid int) error {
var (
c = m.Cgroups
@@ -267,9 +230,8 @@ func (m *Manager) Apply(pid int) error {
properties = append(properties, newProp("Delegate", true))
}
} else {
if hasDelegateScope {
properties = append(properties, newProp("Delegate", true))
}
// Assume scopes always support delegation.
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
@@ -279,10 +241,9 @@ func (m *Manager) Apply(pid int) error {
newProp("CPUAccounting", true),
newProp("BlockIOAccounting", true))
if hasTransientDefaultDependencies {
properties = append(properties,
newProp("DefaultDependencies", false))
}
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
properties = append(properties,
newProp("DefaultDependencies", false))
if c.Resources.Memory != 0 {
properties = append(properties,
@@ -470,7 +431,7 @@ func ExpandSlice(slice string) (string, error) {
}
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
mountpoint, err := cgroups.FindCgroupMountpoint(subsystem)
mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem)
if err != nil {
return "", err
}
@@ -590,6 +551,15 @@ func setKernelMemory(c *configs.Cgroup) error {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// do not try to enable the kernel memory if we already have
// tasks in the cgroup.
content, err := ioutil.ReadFile(filepath.Join(path, "tasks"))
if err != nil {
return err
}
if len(content) > 0 {
return nil
}
return fs.EnableKernelMemoryAccounting(path)
}

View File

@@ -14,39 +14,57 @@ import (
"time"
units "github.com/docker/go-units"
"golang.org/x/sys/unix"
)
const (
cgroupNamePrefix = "name="
CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
// HugePageSizeUnitList is a list of the units used by the linux kernel when
// naming the HugePage control files.
// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt
// TODO Since the kernel only use KB, MB and GB; TB and PB should be removed,
// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393
var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
fields := strings.Fields(txt)
if len(fields) < 5 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
}
}
}
}
@@ -156,8 +174,8 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
continue
}
ss[opt] = true
if strings.HasPrefix(opt, cgroupNamePrefix) {
opt = opt[len(cgroupNamePrefix):]
if strings.HasPrefix(opt, CgroupNamePrefix) {
opt = opt[len(CgroupNamePrefix):]
}
m.Subsystems = append(m.Subsystems, opt)
numFound++
@@ -257,7 +275,7 @@ func GetInitCgroupPath(subsystem string) (string, error) {
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
@@ -343,7 +361,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err
return p, nil
}
if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
@@ -398,19 +416,26 @@ func RemovePaths(paths map[string]string) (err error) {
}
func GetHugePageSize() ([]string, error) {
var pageSizes []string
sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
if err != nil {
return pageSizes, err
return []string{}, err
}
var fileNames []string
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
fileNames = append(fileNames, st.Name())
}
return getHugePageSizeFromFilenames(fileNames)
}
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
var pageSizes []string
for _, fileName := range fileNames {
nameArray := strings.Split(fileName, "-")
pageSize, err := units.RAMInBytes(nameArray[1])
if err != nil {
return []string{}, err
}
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList)
pageSizes = append(pageSizes, sizeString)
}
@@ -454,10 +479,39 @@ func WriteCgroupProc(dir string, pid int) error {
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
if pid == -1 {
return nil
}
cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700)
if err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
defer cgroupProcessesFile.Close()
for i := 0; i < 5; i++ {
_, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid))
if err == nil {
return nil
}
// EINVAL might mean that the task being added to cgroup.procs is in state
// TASK_NEW. We should attempt to do so again.
if isEINVAL(err) {
time.Sleep(30 * time.Millisecond)
continue
}
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
return err
}
func isEINVAL(err error) bool {
switch err := err.(type) {
case *os.PathError:
return err.Err == unix.EINVAL
default:
return false
}
return nil
}