vendor: bump runc to f000fe11

2018-12-17 10:52:13 -06:00
parent f77a0706d0
commit 2b64276536
38 changed files with 1152 additions and 681 deletions
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/BUILD
@@ -12,6 +12,7 @@ go_library(
        "freezer.go",
        "fs_unsupported.go",
        "hugetlb.go",
+        "kmem.go",
        "memory.go",
        "name.go",
        "net_cls.go",
@@ -29,6 +30,7 @@ go_library(
            "//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
+            "//vendor/github.com/pkg/errors:go_default_library",
            "//vendor/golang.org/x/sys/unix:go_default_library",
        ],
        "//conditions:default": [],
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
@@ -3,7 +3,6 @@
 package fs

 import (
-	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -14,6 +13,8 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
 )

 var (
@@ -35,7 +36,7 @@ var (
 	HugePageSizes, _ = cgroups.GetHugePageSize()
 )

-var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
+var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")

 type subsystemSet []subsystem

@@ -62,9 +63,10 @@ type subsystem interface {
 }

 type Manager struct {
-	mu      sync.Mutex
-	Cgroups *configs.Cgroup
-	Paths   map[string]string
+	mu       sync.Mutex
+	Cgroups  *configs.Cgroup
+	Rootless bool // ignore permission-related errors
+	Paths    map[string]string
 }

 // The absolute path to the root of the cgroup hierarchies.
@@ -100,6 +102,33 @@ type cgroupData struct {
 	pid       int
 }

+// isIgnorableError returns whether err is a permission error (in the loose
+// sense of the word). This includes EROFS (which for an unprivileged user is
+// basically a permission error) and EACCES (for similar reasons) as well as
+// the normal EPERM.
+func isIgnorableError(rootless bool, err error) bool {
+	// We do not ignore errors if we are root.
+	if !rootless {
+		return false
+	}
+	// Is it an ordinary EPERM?
+	if os.IsPermission(errors.Cause(err)) {
+		return true
+	}
+
+	// Try to handle other errnos.
+	var errno error
+	switch err := errors.Cause(err).(type) {
+	case *os.PathError:
+		errno = err.Err
+	case *os.LinkError:
+		errno = err.Err
+	case *os.SyscallError:
+		errno = err.Err
+	}
+	return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
+}
+
 func (m *Manager) Apply(pid int) (err error) {
 	if m.Cgroups == nil {
 		return nil
@@ -145,11 +174,11 @@ func (m *Manager) Apply(pid int) (err error) {
 		m.Paths[sys.Name()] = p

 		if err := sys.Apply(d); err != nil {
-			if os.IsPermission(err) && m.Cgroups.Path == "" {
-				// If we didn't set a cgroup path, then let's defer the error here
-				// until we know whether we have set limits or not.
-				// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
-				// it will have the same limits as its parent.
+			// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
+			// been set, we don't bail on error in case of permission problems.
+			// Cases where limits have been set (and we couldn't create our own
+			// cgroup) are handled by Set.
+			if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
 				delete(m.Paths, sys.Name())
 				continue
 			}
@@ -207,9 +236,16 @@ func (m *Manager) Set(container *configs.Config) error {
 	for _, sys := range subsystems {
 		path := paths[sys.Name()]
 		if err := sys.Set(path, container.Cgroups); err != nil {
+			if m.Rootless && sys.Name() == "devices" {
+				continue
+			}
+			// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
+			// However, errors from other subsystems are not ignored.
+			// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
 			if path == "" {
-				// cgroup never applied
-				return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
+				// We never created a path for this cgroup, so we cannot set
+				// limits for it (though we have already tried at this point).
+				return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
 			}
 			return err
 		}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@@ -46,11 +46,7 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error
 	}
 	// because we are not using d.join we need to place the pid into the procs file
 	// unlike the other subsystems
-	if err := cgroups.WriteCgroupProc(path, pid); err != nil {
-		return err
-	}
-
-	return nil
+	return cgroups.WriteCgroupProc(path, pid)
 }

 func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
@@ -83,11 +79,7 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
 			return err
 		}
 	}
-	if err := s.SetRtSched(path, cgroup); err != nil {
-		return err
-	}
-
-	return nil
+	return s.SetRtSched(path, cgroup)
 }

 func (s *CpuGroup) Remove(d *cgroupData) error {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@@ -77,18 +77,14 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
 	// The logic is, if user specified cpuset configs, use these
 	// specified configs, otherwise, inherit from parent. This makes
 	// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
-	// keep backward compatbility.
+	// keep backward compatibility.
 	if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
 		return err
 	}

 	// because we are not using d.join we need to place the pid into the procs file
 	// unlike the other subsystems
-	if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
-		return err
-	}
-
-	return nil
+	return cgroups.WriteCgroupProc(dir, pid)
 }

 func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
@@ -0,0 +1,55 @@
+// +build linux,!nokmem
+
+package fs
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strconv"
+	"syscall" // for Errno type only
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"golang.org/x/sys/unix"
+)
+
+const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+
+func EnableKernelMemoryAccounting(path string) error {
+	// Check if kernel memory is enabled
+	// We have to limit the kernel memory here as it won't be accounted at all
+	// until a limit is set on the cgroup and limit cannot be set once the
+	// cgroup has children, or if there are already tasks in the cgroup.
+	for _, i := range []int64{1, -1} {
+		if err := setKernelMemory(path, i); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func setKernelMemory(path string, kernelMemoryLimit int64) error {
+	if path == "" {
+		return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
+	}
+	if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
+		// kernel memory is not enabled on the system so we should do nothing
+		return nil
+	}
+	if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
+		// Check if the error number returned by the syscall is "EBUSY"
+		// The EBUSY signal is returned on attempts to write to the
+		// memory.kmem.limit_in_bytes file if the cgroup has children or
+		// once tasks have been attached to the cgroup
+		if pathErr, ok := err.(*os.PathError); ok {
+			if errNo, ok := pathErr.Err.(syscall.Errno); ok {
+				if errNo == unix.EBUSY {
+					return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
+				}
+			}
+		}
+		return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem_disabled.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem_disabled.go
@@ -0,0 +1,11 @@
+// +build linux,nokmem
+
+package fs
+
+func EnableKernelMemoryAccounting(path string) error {
+	return nil
+}
+
+func setKernelMemory(path string, kernelMemoryLimit int64) error {
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@@ -5,23 +5,18 @@ package fs
 import (
 	"bufio"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
-	"syscall" // only for Errno

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
-
-	"golang.org/x/sys/unix"
 )

 const (
-	cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
-	cgroupMemorySwapLimit   = "memory.memsw.limit_in_bytes"
-	cgroupMemoryLimit       = "memory.limit_in_bytes"
+	cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
+	cgroupMemoryLimit     = "memory.limit_in_bytes"
 )

 type MemoryGroup struct {
@@ -67,44 +62,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
 	return nil
 }

-func EnableKernelMemoryAccounting(path string) error {
-	// Check if kernel memory is enabled
-	// We have to limit the kernel memory here as it won't be accounted at all
-	// until a limit is set on the cgroup and limit cannot be set once the
-	// cgroup has children, or if there are already tasks in the cgroup.
-	for _, i := range []int64{1, -1} {
-		if err := setKernelMemory(path, i); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func setKernelMemory(path string, kernelMemoryLimit int64) error {
-	if path == "" {
-		return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
-	}
-	if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
-		// kernel memory is not enabled on the system so we should do nothing
-		return nil
-	}
-	if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
-		// Check if the error number returned by the syscall is "EBUSY"
-		// The EBUSY signal is returned on attempts to write to the
-		// memory.kmem.limit_in_bytes file if the cgroup has children or
-		// once tasks have been attached to the cgroup
-		if pathErr, ok := err.(*os.PathError); ok {
-			if errNo, ok := pathErr.Err.(syscall.Errno); ok {
-				if errNo == unix.EBUSY {
-					return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
-				}
-			}
-		}
-		return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
-	}
-	return nil
-}
-
 func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
 	// If the memory update is set to -1 we should also
 	// set swap to -1, it means unlimited memory.
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
@@ -5,6 +5,7 @@ package systemd
 import (
 	"errors"
 	"fmt"
+	"math"
 	"os"
 	"path/filepath"
 	"strings"
@@ -295,13 +296,19 @@ func (m *Manager) Apply(pid int) error {

 	// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
 	if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
-		cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
-		// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
-		// (integer percentage of CPU) internally.  This means that if a fractional percent of
-		// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
-		// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
-		if cpuQuotaPerSecUSec%10000 != 0 {
-			cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
+		// corresponds to USEC_INFINITY in systemd
+		// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
+		// always setting a property value ensures we can apply a quota and remove it later
+		cpuQuotaPerSecUSec := uint64(math.MaxUint64)
+		if c.Resources.CpuQuota > 0 {
+			// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
+			// (integer percentage of CPU) internally.  This means that if a fractional percent of
+			// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
+			// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
+			cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
+			if cpuQuotaPerSecUSec%10000 != 0 {
+				cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
+			}
 		}
 		properties = append(properties,
 			newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
@@ -312,6 +319,12 @@ func (m *Manager) Apply(pid int) error {
 			newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
 	}

+	if c.Resources.PidsLimit > 0 {
+		properties = append(properties,
+			newProp("TasksAccounting", true),
+			newProp("TasksMax", uint64(c.Resources.PidsLimit)))
+	}
+
 	// We have to set kernel memory here, as we can't change it once
 	// processes have been attached to the cgroup.
 	if c.Resources.KernelMemory != 0 {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@@ -13,7 +13,7 @@ import (
 	"strings"
 	"time"

-	"github.com/docker/go-units"
+	units "github.com/docker/go-units"
 )

 const (
@@ -103,7 +103,7 @@ func FindCgroupMountpointDir() (string, error) {
 		}

 		if postSeparatorFields[0] == "cgroup" {
-			// Check that the mount is properly formated.
+			// Check that the mount is properly formatted.
 			if numPostFields < 3 {
 				return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
 			}
@@ -151,19 +151,20 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
 			Root:       fields[3],
 		}
 		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
-			if !ss[opt] {
+			seen, known := ss[opt]
+			if !known || (!all && seen) {
 				continue
 			}
+			ss[opt] = true
 			if strings.HasPrefix(opt, cgroupNamePrefix) {
-				m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
-			} else {
-				m.Subsystems = append(m.Subsystems, opt)
-			}
-			if !all {
-				numFound++
+				opt = opt[len(cgroupNamePrefix):]
 			}
+			m.Subsystems = append(m.Subsystems, opt)
+			numFound++
+		}
+		if len(m.Subsystems) > 0 || all {
+			res = append(res, m)
 		}
-		res = append(res, m)
 	}
 	if err := scanner.Err(); err != nil {
 		return nil, err
@@ -187,7 +188,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {

 	allMap := make(map[string]bool)
 	for s := range allSubsystems {
-		allMap[s] = true
+		allMap[s] = false
 	}
 	return getCgroupMountsHelper(allMap, f, all)
 }
@@ -262,7 +263,7 @@ func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
 	}

 	// This is needed for nested containers, because in /proc/self/cgroup we
-	// see pathes from host, which don't exist in container.
+	// see paths from host, which don't exist in container.
 	relCgroup, err := filepath.Rel(root, cgroup)
 	if err != nil {
 		return "", err