deps: update runc to 1.1.0
This updates vendored runc/libcontainer to 1.1.0, and google/cadvisor to a version updated to runc 1.1.0 (google/cadvisor#3048). Changes in vendor are generated by (roughly): ./hack/pin-dependency.sh github.com/google/cadvisor v0.44.0 ./hack/pin-dependency.sh github.com/opencontainers/runc v1.1.0 ./hack/update-vendor.sh ./hack/lint-dependencies.sh # And follow all its recommendations. ./hack/update-vendor.sh ./hack/update-internal-modules.sh ./hack/lint-dependencies.sh # Re-check everything again. Co-Authored-By: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
5
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
generated
vendored
@@ -3,7 +3,6 @@ package apparmor
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
@@ -19,7 +18,7 @@ var (
|
||||
func isEnabled() bool {
|
||||
checkAppArmor.Do(func() {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
|
||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||
}
|
||||
})
|
||||
@@ -52,7 +51,7 @@ func setProcAttr(attr, value string) error {
|
||||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||
func changeOnExec(name string) error {
|
||||
if err := setProcAttr("exec", "exec "+name); err != nil {
|
||||
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
||||
return fmt.Errorf("apparmor failed to apply profile: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
1
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package apparmor
|
||||
|
12
vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go
generated
vendored
12
vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package capabilities
|
||||
@@ -34,6 +35,17 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
// KnownCapabilities returns the list of the known capabilities.
|
||||
// Used by `runc features`.
|
||||
func KnownCapabilities() []string {
|
||||
list := capability.List()
|
||||
res := make([]string, len(list))
|
||||
for i, c := range list {
|
||||
res[i] = "CAP_" + strings.ToUpper(c.String())
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// New creates a new Caps from the given Capabilities config. Unknown Capabilities
|
||||
// or Capabilities that are unavailable in the current environment are ignored,
|
||||
// printing a warning instead.
|
||||
|
@@ -1,3 +1,4 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package capabilities
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
|
3
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go
generated
vendored
@@ -1,3 +0,0 @@
|
||||
// +build !linux
|
||||
|
||||
package cgroups
|
66
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
66
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
/*
|
||||
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
|
||||
@@ -22,14 +20,13 @@ package devices
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// deviceMeta is a Rule without the Allow or Permissions fields, and no
|
||||
@@ -79,19 +76,21 @@ func (e *Emulator) IsAllowAll() bool {
|
||||
return e.IsBlacklist() && len(e.rules) == 0
|
||||
}
|
||||
|
||||
var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`)
|
||||
|
||||
func parseLine(line string) (*deviceRule, error) {
|
||||
matches := devicesListRegexp.FindStringSubmatch(line)
|
||||
if matches == nil {
|
||||
return nil, errors.Errorf("line doesn't match devices.list format")
|
||||
// Input: node major:minor perms.
|
||||
fields := strings.FieldsFunc(line, func(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
})
|
||||
if len(fields) != 4 {
|
||||
return nil, fmt.Errorf("malformed devices.list rule %s", line)
|
||||
}
|
||||
|
||||
var (
|
||||
rule deviceRule
|
||||
node = matches[1]
|
||||
major = matches[2]
|
||||
minor = matches[3]
|
||||
perms = matches[4]
|
||||
node = fields[0]
|
||||
major = fields[1]
|
||||
minor = fields[2]
|
||||
perms = fields[3]
|
||||
)
|
||||
|
||||
// Parse the node type.
|
||||
@@ -107,8 +106,7 @@ func parseLine(line string) (*deviceRule, error) {
|
||||
case "c":
|
||||
rule.meta.node = devices.CharDevice
|
||||
default:
|
||||
// Should never happen!
|
||||
return nil, errors.Errorf("unknown device type %q", node)
|
||||
return nil, fmt.Errorf("unknown device type %q", node)
|
||||
}
|
||||
|
||||
// Parse the major number.
|
||||
@@ -117,7 +115,7 @@ func parseLine(line string) (*deviceRule, error) {
|
||||
} else {
|
||||
val, err := strconv.ParseUint(major, 10, 32)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse major number")
|
||||
return nil, fmt.Errorf("invalid major number: %w", err)
|
||||
}
|
||||
rule.meta.major = int64(val)
|
||||
}
|
||||
@@ -128,7 +126,7 @@ func parseLine(line string) (*deviceRule, error) {
|
||||
} else {
|
||||
val, err := strconv.ParseUint(minor, 10, 32)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse minor number")
|
||||
return nil, fmt.Errorf("invalid minor number: %w", err)
|
||||
}
|
||||
rule.meta.minor = int64(val)
|
||||
}
|
||||
@@ -136,13 +134,12 @@ func parseLine(line string) (*deviceRule, error) {
|
||||
// Parse the access permissions.
|
||||
rule.perms = devices.Permissions(perms)
|
||||
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||
// Should never happen!
|
||||
return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||
return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||
}
|
||||
return &rule, nil
|
||||
}
|
||||
|
||||
func (e *Emulator) addRule(rule deviceRule) error {
|
||||
func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
|
||||
if e.rules == nil {
|
||||
e.rules = make(map[deviceMeta]devices.Permissions)
|
||||
}
|
||||
@@ -180,7 +177,7 @@ func (e *Emulator) rmRule(rule deviceRule) error {
|
||||
// Only give an error if the set of permissions overlap.
|
||||
partialPerms := e.rules[partialMeta]
|
||||
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||
return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||
return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,9 +209,9 @@ func (e *Emulator) allow(rule *deviceRule) error {
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception")
|
||||
err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
|
||||
} else {
|
||||
err = errors.Wrap(e.addRule(*rule), "add 'allow' exception")
|
||||
err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -232,16 +229,16 @@ func (e *Emulator) deny(rule *deviceRule) error {
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = errors.Wrap(e.addRule(*rule), "add 'deny' exception")
|
||||
err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
|
||||
} else {
|
||||
err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception")
|
||||
err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) Apply(rule devices.Rule) error {
|
||||
if !rule.Type.CanCgroup() {
|
||||
return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||
return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||
}
|
||||
|
||||
innerRule := &deviceRule{
|
||||
@@ -283,17 +280,17 @@ func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||
line := s.Text()
|
||||
deviceRule, err := parseLine(line)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "parsing line %q", line)
|
||||
return nil, fmt.Errorf("error parsing line %q: %w", line, err)
|
||||
}
|
||||
// "devices.list" is an allow list. Note that this means that in
|
||||
// black-list mode, we have no idea what rules are in play. As a
|
||||
// result, we need to be very careful in Transition().
|
||||
if err := e.allow(deviceRule); err != nil {
|
||||
return nil, errors.Wrapf(err, "adding devices.list rule")
|
||||
return nil, fmt.Errorf("error adding devices.list rule: %w", err)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, errors.Wrap(err, "reading devices.list lines")
|
||||
return nil, fmt.Errorf("error reading devices.list lines: %w", err)
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
@@ -305,7 +302,7 @@ func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||
// necessary.
|
||||
//
|
||||
// This function is the sole reason for all of Emulator -- to allow us
|
||||
// to figure out how to update a containers' cgroups without causing spurrious
|
||||
// to figure out how to update a containers' cgroups without causing spurious
|
||||
// device errors (if possible).
|
||||
func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
|
||||
var transitionRules []*devices.Rule
|
||||
@@ -380,3 +377,10 @@ func (e *Emulator) Rules() ([]*devices.Rule, error) {
|
||||
defaultCgroup := &Emulator{defaultAllow: false}
|
||||
return defaultCgroup.Transition(e)
|
||||
}
|
||||
|
||||
func wrapErr(err error, text string) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf(text+": %w", err)
|
||||
}
|
||||
|
@@ -7,13 +7,14 @@
|
||||
package devicefilter
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -51,21 +52,20 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||
// only be one (at most) at the very start to instruct cgroupv1 to
|
||||
// go into allow-list mode. However we do double-check this here.
|
||||
if idx != 0 || rule.Allow != emu.IsBlacklist() {
|
||||
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
continue
|
||||
}
|
||||
if rule.Allow == p.defaultAllow {
|
||||
// There should be no rules which have an action equal to the
|
||||
// default action, the emulator removes those.
|
||||
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
if err := p.appendRule(rule); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
return p.finalize(), license, nil
|
||||
}
|
||||
|
||||
type program struct {
|
||||
@@ -118,13 +118,13 @@ func (p *program) appendRule(rule *devices.Rule) error {
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
default:
|
||||
// We do not permit 'a', nor any other types we don't know about.
|
||||
return errors.Errorf("invalid type %q", string(rule.Type))
|
||||
return fmt.Errorf("invalid type %q", string(rule.Type))
|
||||
}
|
||||
if rule.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", rule.Major)
|
||||
return fmt.Errorf("invalid major %d", rule.Major)
|
||||
}
|
||||
if rule.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", rule.Major)
|
||||
return fmt.Errorf("invalid minor %d", rule.Major)
|
||||
}
|
||||
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := rule.Minor >= 0
|
||||
@@ -138,7 +138,7 @@ func (p *program) appendRule(rule *devices.Rule) error {
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return errors.Errorf("unknown device access %v", r)
|
||||
return fmt.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
@@ -180,7 +180,7 @@ func (p *program) appendRule(rule *devices.Rule) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
func (p *program) finalize() asm.Instructions {
|
||||
var v int32
|
||||
if p.defaultAllow {
|
||||
v = 1
|
||||
@@ -192,7 +192,7 @@ func (p *program) finalize() (asm.Instructions, error) {
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
return p.insts
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
@@ -1,6 +1,7 @@
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
@@ -10,7 +11,6 @@ import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -134,7 +134,7 @@ func haveBpfProgReplace() bool {
|
||||
// not supported
|
||||
return
|
||||
}
|
||||
// attach_flags test succeded.
|
||||
// attach_flags test succeeded.
|
||||
if !errors.Is(err, unix.EBADF) {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
|
||||
}
|
||||
|
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
@@ -2,20 +2,27 @@ package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
// It is supposed to be used for cgroup files only, and returns
|
||||
// an error if the file is not a cgroup file.
|
||||
//
|
||||
// Arguments dir and file are joined together to form an absolute path
|
||||
// to a file being opened.
|
||||
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
if dir == "" {
|
||||
return nil, errors.Errorf("no directory specified for %s", file)
|
||||
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
return openFile(dir, file, flags)
|
||||
}
|
||||
@@ -43,7 +50,8 @@ func WriteFile(dir, file, data string) error {
|
||||
}
|
||||
defer fd.Close()
|
||||
if err := retryingWriteFile(fd, data); err != nil {
|
||||
return errors.Wrapf(err, "failed to write %q", data)
|
||||
// Having data in the error message helps in debugging.
|
||||
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -81,7 +89,7 @@ func prepareOpenat2() error {
|
||||
})
|
||||
if err != nil {
|
||||
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||
if err != unix.ENOSYS {
|
||||
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
} else {
|
||||
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||
@@ -107,8 +115,6 @@ func prepareOpenat2() error {
|
||||
return prepErr
|
||||
}
|
||||
|
||||
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||
mode := os.FileMode(0)
|
||||
if TestMode && flags&os.O_WRONLY != 0 {
|
||||
@@ -116,34 +122,52 @@ func openFile(dir, file string, flags int) (*os.File, error) {
|
||||
flags |= os.O_TRUNC | os.O_CREATE
|
||||
mode = 0o600
|
||||
}
|
||||
path := path.Join(dir, file)
|
||||
if prepareOpenat2() != nil {
|
||||
return openFallback(dir, file, flags, mode)
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
reldir := strings.TrimPrefix(dir, cgroupfsPrefix)
|
||||
if len(reldir) == len(dir) { // non-standard path, old system?
|
||||
return openFallback(dir, file, flags, mode)
|
||||
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||
if len(relPath) == len(path) { // non-standard path, old system?
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
|
||||
relname := reldir + "/" + file
|
||||
fd, err := unix.Openat2(cgroupFd, relname,
|
||||
fd, err := unix.Openat2(cgroupFd, relPath,
|
||||
&unix.OpenHow{
|
||||
Resolve: resolveFlags,
|
||||
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||
Mode: uint64(mode),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, &os.PathError{Op: "openat2", Path: dir + "/" + file, Err: err}
|
||||
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||
// Check if cgroupFd is still opened to cgroupfsDir
|
||||
// (happens when this package is incorrectly used
|
||||
// across the chroot/pivot_root/mntns boundary, or
|
||||
// when /sys/fs/cgroup is remounted).
|
||||
//
|
||||
// TODO: if such usage will ever be common, amend this
|
||||
// to reopen cgroupFd and retry openat2.
|
||||
fdStr := strconv.Itoa(cgroupFd)
|
||||
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
||||
if fdDest != cgroupfsDir {
|
||||
// Wrap the error so it is clear that cgroupFd
|
||||
// is opened to an unexpected/wrong directory.
|
||||
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
|
||||
fdStr, fdDest, cgroupfsDir, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||
|
||||
// openFallback is used when openat2(2) is not available. It checks the opened
|
||||
// Can be changed by unit tests.
|
||||
var openFallback = openAndCheck
|
||||
|
||||
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||
// file is on cgroupfs, returning an error otherwise.
|
||||
func openFallback(dir, file string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
path := dir + "/" + file
|
||||
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
fd, err := os.OpenFile(path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
@@ -1,10 +1,7 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -23,8 +20,8 @@ func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||
@@ -131,19 +128,19 @@ func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, fmt.Errorf("Invalid line found while parsing %s/%s: %s", dir, file, sc.Text())
|
||||
return nil, malformedLine(dir, file, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
minor := v
|
||||
|
||||
@@ -155,10 +152,13 @@ func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
22
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
22
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -21,24 +19,19 @@ func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(path string, d *cgroupData) error {
|
||||
// This might happen if we have no cpu cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, d.config.Resources); err != nil {
|
||||
if err := s.SetRtSched(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using join(), we need to place the pid
|
||||
// into the procs file unlike other subsystems.
|
||||
return cgroups.WriteCgroupProc(path, d.pid)
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||
@@ -105,7 +98,8 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY)
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
@@ -118,7 +112,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
|
46
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
46
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
@@ -1,12 +1,8 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -38,8 +34,8 @@ func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||
@@ -85,45 +81,43 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
file = cgroupCpuacctStat
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := cgroups.ReadFile(path, cgroupCpuacctStat)
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
// TODO: use strings.SplitN instead.
|
||||
fields := strings.Fields(data)
|
||||
if len(fields) < 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
}
|
||||
if fields[0] != userField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
||||
}
|
||||
if fields[2] != systemField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
|
||||
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||
return 0, 0, malformedLine(path, file, data)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
const file = "cpuacct.usage_percpu"
|
||||
percpuUsage := []uint64{}
|
||||
data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu")
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
// TODO: use strings.SplitN instead.
|
||||
for _, value := range strings.Fields(data) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
|
||||
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
@@ -133,16 +127,17 @@ func getPercpuUsage(path string) ([]uint64, error) {
|
||||
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
usageKernelMode := []uint64{}
|
||||
usageUserMode := []uint64{}
|
||||
const file = cgroupCpuacctUsageAll
|
||||
|
||||
file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
} else if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
defer fd.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
scanner := bufio.NewScanner(fd)
|
||||
scanner.Scan() // skipping header line
|
||||
|
||||
for scanner.Scan() {
|
||||
@@ -153,19 +148,18 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
|
||||
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to convert CPU usage in kernel mode to uint64: %s", err)
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||
|
||||
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to convert CPU usage in user mode to uint64: %s", err)
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, fmt.Errorf("Problem in reading %s line by line, %s", cgroupCpuacctUsageAll, err)
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
|
47
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
47
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
@@ -1,19 +1,17 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type CpusetGroup struct{}
|
||||
@@ -22,8 +20,8 @@ func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
|
||||
return s.ApplyDir(path, d.config.Resources, d.pid)
|
||||
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
return s.ApplyDir(path, r, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
@@ -40,32 +38,32 @@ func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCpusetStat(path string, filename string) ([]uint16, error) {
|
||||
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||
var extracted []uint16
|
||||
fileContent, err := fscommon.GetCgroupParamString(path, filename)
|
||||
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
}
|
||||
if len(fileContent) == 0 {
|
||||
return extracted, fmt.Errorf("%s found to be empty", filepath.Join(path, filename))
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||
}
|
||||
|
||||
for _, s := range strings.Split(fileContent, ",") {
|
||||
splitted := strings.SplitN(s, "-", 3)
|
||||
switch len(splitted) {
|
||||
sp := strings.SplitN(s, "-", 3)
|
||||
switch len(sp) {
|
||||
case 3:
|
||||
return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename))
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||
case 2:
|
||||
min, err := strconv.ParseUint(splitted[0], 10, 16)
|
||||
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
max, err := strconv.ParseUint(splitted[1], 10, 16)
|
||||
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if min > max {
|
||||
return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename))
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||
}
|
||||
for i := min; i <= max; i++ {
|
||||
extracted = append(extracted, uint16(i))
|
||||
@@ -73,7 +71,7 @@ func getCpusetStat(path string, filename string) ([]uint16, error) {
|
||||
case 1:
|
||||
value, err := strconv.ParseUint(s, 10, 16)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
extracted = append(extracted, uint16(value))
|
||||
}
|
||||
@@ -168,9 +166,8 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
|
||||
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
@@ -198,7 +195,7 @@ func cpusetEnsureParent(current string) error {
|
||||
}
|
||||
// Treat non-existing directory as cgroupfs as it will be created,
|
||||
// and the root cpuset directory obviously exists.
|
||||
if err != nil && err != unix.ENOENT {
|
||||
if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||
}
|
||||
|
||||
@@ -224,12 +221,12 @@ func cpusetCopyIfNeeded(current, parent string) error {
|
||||
}
|
||||
|
||||
if isEmptyCpuset(currentCpus) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if isEmptyCpuset(currentMems) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
13
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
13
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -15,15 +13,15 @@ import (
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
testingSkipFinalCheck bool
|
||||
TestingSkipFinalCheck bool
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
|
||||
if d.config.SkipDevices {
|
||||
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
if path == "" {
|
||||
@@ -31,7 +29,8 @@ func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
|
||||
// is a hard requirement for container's security.
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
return join(path, d.pid)
|
||||
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
|
||||
@@ -91,7 +90,7 @@ func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||
//
|
||||
// This safety-check is skipped for the unit tests because we cannot
|
||||
// currently mock devices.list correctly.
|
||||
if !s.testingSkipFinalCheck {
|
||||
if !s.TestingSkipFinalCheck {
|
||||
currentAfter, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
|
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||
// in a particular format but get some garbage instead.
|
||||
func malformedLine(path, file, line string) error {
|
||||
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||
}
|
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -21,8 +19,8 @@ func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||
|
326
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
326
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
@@ -1,165 +1,86 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
subsystems = []subsystem{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
var subsystems = []subsystem{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&RdmaGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
func init() {
|
||||
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||
// it should join the cgroups v2.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
// GetStats fills in the stats for the subsystem.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Creates and joins the cgroup represented by 'cgroupData'.
|
||||
Apply(path string, c *cgroupData) error
|
||||
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||
// subsystems use resources to pre-configure the cgroup parents
|
||||
// before creating or joining it.
|
||||
Apply(path string, r *configs.Resources, pid int) error
|
||||
// Set sets the cgroup resources.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
rootless bool // ignore permission-related errors
|
||||
paths map[string]string
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgroups.Manager {
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||
// cgroups.Resources to not be nil in Apply.
|
||||
if cg.Resources == nil {
|
||||
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||
}
|
||||
if cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &manager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
rootless: rootless,
|
||||
}
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var (
|
||||
cgroupRootLock sync.Mutex
|
||||
cgroupRoot string
|
||||
)
|
||||
|
||||
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||
|
||||
func tryDefaultCgroupRoot() string {
|
||||
var st, pst unix.Stat_t
|
||||
|
||||
// (1) it should be a directory...
|
||||
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) ... and a mount point ...
|
||||
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Dev == pst.Dev {
|
||||
// parent dir has the same dev -- not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) ... of 'tmpfs' fs type.
|
||||
var fst unix.Statfs_t
|
||||
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (4) it should have at least 1 entry ...
|
||||
dir, err := os.Open(defaultCgroupRoot)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
names, err := dir.Readdirnames(1)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if len(names) < 1 {
|
||||
return ""
|
||||
}
|
||||
// ... which is a cgroup mount point.
|
||||
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return defaultCgroupRoot
|
||||
}
|
||||
|
||||
// Gets the cgroupRoot.
|
||||
func getCgroupRoot() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// fast path
|
||||
cgroupRoot = tryDefaultCgroupRoot()
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// slow path: parse mountinfo
|
||||
mi, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||
// use its parent directory.
|
||||
root := filepath.Dir(mi[0].Mountpoint)
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
type cgroupData struct {
|
||||
root string
|
||||
innerPath string
|
||||
config *configs.Cgroup
|
||||
pid int
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
@@ -171,8 +92,6 @@ func isIgnorableError(rootless bool, err error) bool {
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// TODO: rm errors.Cause once we switch to %w everywhere
|
||||
err = errors.Cause(err)
|
||||
// Is it an ordinary EPERM?
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return true
|
||||
@@ -186,56 +105,30 @@ func isIgnorableError(rootless bool, err error) bool {
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) (err error) {
|
||||
if m.cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
c := m.cgroups
|
||||
if c.Resources.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
m.paths = make(map[string]string)
|
||||
if c.Paths != nil {
|
||||
cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for name, path := range c.Paths {
|
||||
// XXX(kolyshkin@): why this check is needed?
|
||||
if _, ok := cgMap[name]; ok {
|
||||
m.paths[name] = path
|
||||
}
|
||||
}
|
||||
return cgroups.EnterPid(m.paths, pid)
|
||||
}
|
||||
|
||||
d, err := getCgroupData(m.cgroups, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, sys := range subsystems {
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem is
|
||||
// considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && (c.SkipDevices || sys.Name() != "devices") {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
name := sys.Name()
|
||||
p, ok := m.paths[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
m.paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(p, d); err != nil {
|
||||
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems. Cases where limits have been set
|
||||
// (and we couldn't create our own cgroup) are handled by Set.
|
||||
if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" {
|
||||
delete(m.paths, sys.Name())
|
||||
// case of permission problems here, but do delete the path from
|
||||
// the m.paths map, since it is either non-existent and could not
|
||||
// be created, or the pid could not be added to it.
|
||||
//
|
||||
// Cases where limits for the subsystem have been set are handled
|
||||
// later by Set, which fails with a friendly error (see
|
||||
// if path == "" in Set).
|
||||
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||
delete(m.paths, name)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
@@ -246,9 +139,6 @@ func (m *manager) Apply(pid int) (err error) {
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
if m.cgroups == nil || m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return cgroups.RemovePaths(m.paths)
|
||||
@@ -281,11 +171,6 @@ func (m *manager) Set(r *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.cgroups != nil && m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
@@ -295,10 +180,11 @@ func (m *manager) Set(r *configs.Resources) error {
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
if m.rootless && sys.Name() == "devices" {
|
||||
// When rootless is true, errors from the device subsystem
|
||||
// are ignored, as it is really not expected to work.
|
||||
if m.cgroups.Rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
@@ -317,7 +203,7 @@ func (m *manager) Set(r *configs.Resources) error {
|
||||
// provided
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
path := m.Path("freezer")
|
||||
if m.cgroups == nil || path == "" {
|
||||
if path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
@@ -339,68 +225,6 @@ func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return nil, errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return &cgroupData{
|
||||
root: root,
|
||||
innerPath: innerPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(raw.innerPath) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, raw.innerPath), nil
|
||||
}
|
||||
|
||||
func join(path string, pid int) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
@@ -432,7 +256,7 @@ func OOMKillCount(path string) (uint64, error) {
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.Path("memory"))
|
||||
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||
if err != nil && m.rootless && os.IsNotExist(err) {
|
||||
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
|
17
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
17
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
@@ -1,9 +1,6 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
@@ -17,8 +14,8 @@ func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
@@ -32,29 +29,29 @@ func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
usage := "hugetlb." + pageSize + ".usage_in_bytes"
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
return err
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := "hugetlb." + pageSize + ".failcnt"
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
|
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
@@ -1,9 +1,8 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
@@ -11,11 +10,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -31,8 +30,8 @@ func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) {
|
||||
return join(path, d.pid)
|
||||
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func setMemory(path string, val int64) error {
|
||||
@@ -56,7 +55,7 @@ func setMemory(path string, val int64) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
}
|
||||
|
||||
func setSwap(path string, val int64) error {
|
||||
@@ -134,15 +133,15 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *r.MemorySwappiness)
|
||||
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY)
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
@@ -155,7 +154,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
@@ -220,42 +219,42 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
// are optional in the kernel.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
|
||||
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||
const (
|
||||
maxColumns = math.MaxUint8 + 1
|
||||
filename = "memory.numa_stat"
|
||||
file = "memory.numa_stat"
|
||||
)
|
||||
stats := cgroups.PageUsageByNUMA{}
|
||||
|
||||
file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY)
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return stats, nil
|
||||
} else if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
defer file.Close()
|
||||
defer fd.Close()
|
||||
|
||||
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||
// and it looks like this:
|
||||
@@ -266,7 +265,7 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
|
||||
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
var field *cgroups.PageStats
|
||||
|
||||
@@ -284,8 +283,7 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
|
||||
} else {
|
||||
// The first column was already validated,
|
||||
// so be strict to the rest.
|
||||
return stats, fmt.Errorf("malformed line %q in %s",
|
||||
line, filename)
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
}
|
||||
key, val := byNode[0], byNode[1]
|
||||
@@ -296,24 +294,23 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
|
||||
}
|
||||
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||
if err != nil {
|
||||
return stats, err
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
field.Nodes = map[uint8]uint64{}
|
||||
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||
if len(key) < 2 || key[0] != 'N' {
|
||||
// This is definitely an error.
|
||||
return stats, fmt.Errorf("malformed line %q in %s",
|
||||
line, filename)
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
|
||||
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
usage, err := strconv.ParseUint(val, 10, 64)
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
field.Nodes[uint8(n)] = usage
|
||||
@@ -321,9 +318,8 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
|
||||
|
||||
}
|
||||
}
|
||||
err = scanner.Err()
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
if err := scanner.Err(); err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -16,10 +14,10 @@ func (s *NameGroup) Name() string {
|
||||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(path string, d *cgroupData) error {
|
||||
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
if s.Join {
|
||||
// ignore errors if the named cgroup does not exist
|
||||
_ = join(path, d.pid)
|
||||
// Ignore errors if the named cgroup does not exist.
|
||||
_ = apply(path, pid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -15,8 +13,8 @@ func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||
|
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -13,8 +11,8 @@ func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||
|
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var (
|
||||
cgroupRootLock sync.Mutex
|
||||
cgroupRoot string
|
||||
)
|
||||
|
||||
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||
|
||||
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||
root, err := rootPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inner, err := innerPath(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
path, err := subsysPath(root, inner, name)
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem
|
||||
// is considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func tryDefaultCgroupRoot() string {
|
||||
var st, pst unix.Stat_t
|
||||
|
||||
// (1) it should be a directory...
|
||||
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) ... and a mount point ...
|
||||
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Dev == pst.Dev {
|
||||
// parent dir has the same dev -- not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) ... of 'tmpfs' fs type.
|
||||
var fst unix.Statfs_t
|
||||
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (4) it should have at least 1 entry ...
|
||||
dir, err := os.Open(defaultCgroupRoot)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
names, err := dir.Readdirnames(1)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if len(names) < 1 {
|
||||
return ""
|
||||
}
|
||||
// ... which is a cgroup mount point.
|
||||
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return defaultCgroupRoot
|
||||
}
|
||||
|
||||
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||
func rootPath() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// fast path
|
||||
cgroupRoot = tryDefaultCgroupRoot()
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// slow path: parse mountinfo
|
||||
mi, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||
// use its parent directory.
|
||||
root := filepath.Dir(mi[0].Mountpoint)
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
func innerPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(c.Path)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(c.Parent)
|
||||
cgName := utils.CleanPath(c.Name)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return innerPath, nil
|
||||
}
|
||||
|
||||
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(inner) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, inner), nil
|
||||
}
|
||||
|
||||
func apply(path string, pid int) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
@@ -13,8 +11,8 @@ func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||
|
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
@@ -1,10 +1,7 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
@@ -18,8 +15,8 @@ func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||
@@ -45,21 +42,18 @@ func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
}
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
|
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type RdmaGroup struct{}
|
||||
|
||||
func (s *RdmaGroup) Name() string {
|
||||
return "rdma"
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||
return fscommon.RdmaSet(path, r)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return fscommon.RdmaGetStats(path, stats)
|
||||
}
|
3
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go
generated
vendored
@@ -1,3 +0,0 @@
|
||||
// +build !linux
|
||||
|
||||
package fs
|
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
@@ -49,7 +47,8 @@ func setCpu(dirPath string, r *configs.Resources) error {
|
||||
}
|
||||
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -59,7 +58,7 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
@@ -81,5 +80,8 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
|
26
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
26
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
@@ -18,41 +18,37 @@ package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
if len(c.Paths) != 0 {
|
||||
// never set by specconv
|
||||
return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c)
|
||||
return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName)
|
||||
return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
innerPath := cgPath
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(cgPath)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(cgParent)
|
||||
cgName := utils.CleanPath(cgName)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
@@ -89,7 +85,7 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", errors.Errorf("invalid cgroup entry: %q", text)
|
||||
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
|
11
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
11
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
@@ -1,16 +1,15 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isRWM(perms devices.Permissions) bool {
|
||||
@@ -64,7 +63,7 @@ func setDevices(dirPath string, r *configs.Resources) error {
|
||||
}
|
||||
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
|
||||
if err != nil {
|
||||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
return fmt.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
|
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
@@ -1,19 +1,17 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
stdErrors "errors"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
@@ -26,7 +24,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
case configs.Thawed:
|
||||
stateStr = "0"
|
||||
default:
|
||||
return errors.Errorf("invalid freezer state %q requested", state)
|
||||
return fmt.Errorf("invalid freezer state %q requested", state)
|
||||
}
|
||||
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
|
||||
@@ -37,7 +35,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
if state != configs.Frozen {
|
||||
return nil
|
||||
}
|
||||
return errors.Wrap(err, "freezer not supported")
|
||||
return fmt.Errorf("freezer not supported: %w", err)
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
@@ -48,7 +46,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
if actualState, err := readFreezer(dirPath, fd); err != nil {
|
||||
return err
|
||||
} else if actualState != state {
|
||||
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -58,7 +56,7 @@ func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as being in
|
||||
// an "undefined" state.
|
||||
if os.IsNotExist(err) || stdErrors.Is(err, unix.ENODEV) {
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
@@ -82,7 +80,7 @@ func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
|
||||
case "1\n":
|
||||
return waitFrozen(dirPath)
|
||||
default:
|
||||
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
}
|
||||
}
|
||||
|
||||
|
52
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
52
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
@@ -1,8 +1,7 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
@@ -10,9 +9,10 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
type manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
@@ -20,16 +20,12 @@ type manager struct {
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
rootless bool
|
||||
}
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
config = &configs.Cgroup{}
|
||||
}
|
||||
func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) {
|
||||
if dirPath == "" {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
@@ -39,9 +35,8 @@ func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
rootless: rootless,
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
@@ -53,7 +48,7 @@ func (m *manager) getControllers() error {
|
||||
|
||||
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
if m.rootless && m.config.Path == "" {
|
||||
if m.config.Rootless && m.config.Path == "" {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
@@ -73,12 +68,12 @@ func (m *manager) Apply(pid int) error {
|
||||
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
|
||||
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
|
||||
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if m.rootless {
|
||||
if m.config.Rootless {
|
||||
if m.config.Path == "" {
|
||||
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
|
||||
return nil
|
||||
}
|
||||
return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups")
|
||||
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
|
||||
}
|
||||
}
|
||||
return err
|
||||
@@ -123,13 +118,20 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.config.Rootless {
|
||||
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
if m.config.Resources == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -146,6 +148,9 @@ func (m *manager) Path(_ string) string {
|
||||
}
|
||||
|
||||
func (m *manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if err := m.getControllers(); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -167,10 +172,10 @@ func (m *manager) Set(r *configs.Resources) error {
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
//
|
||||
// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if err := setDevices(m.dirPath, r); err != nil && !m.rootless {
|
||||
if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless {
|
||||
return err
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
@@ -181,6 +186,10 @@ func (m *manager) Set(r *configs.Resources) error {
|
||||
if err := setHugeTlb(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
|
||||
return err
|
||||
@@ -198,9 +207,8 @@ func (m *manager) setUnified(res map[string]string) error {
|
||||
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
|
||||
errC := errors.Cause(err)
|
||||
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
|
||||
if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) {
|
||||
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
|
||||
// Check if a controller is available,
|
||||
// to give more specific error if not.
|
||||
sk := strings.SplitN(k, ".", 2)
|
||||
@@ -212,7 +220,7 @@ func (m *manager) setUnified(res map[string]string) error {
|
||||
return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
|
||||
}
|
||||
}
|
||||
return errors.Wrapf(err, "can't set unified resource %q", k)
|
||||
return fmt.Errorf("unable to set unified resource %q: %w", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,7 +251,7 @@ func OOMKillCount(path string) (uint64, error) {
|
||||
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.dirPath)
|
||||
if err != nil && m.rootless && os.IsNotExist(err) {
|
||||
if err != nil && m.config.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
|
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
@@ -1,12 +1,8 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
@@ -30,10 +26,8 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
|
||||
}
|
||||
|
||||
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
hugePageSizes, _ := cgroups.GetHugePageSize()
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
|
||||
for _, pagesize := range hugePageSizes {
|
||||
for _, pagesize := range cgroups.HugePageSizes() {
|
||||
value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -43,7 +37,7 @@ func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
fileName := "hugetlb." + pagesize + ".events"
|
||||
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to read stats")
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
|
13
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
13
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
@@ -117,13 +115,14 @@ func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dirPath, File: name, Err: err}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
values, err := readCgroup2MapFile(dirPath, "io.stat")
|
||||
const file = "io.stat"
|
||||
values, err := readCgroup2MapFile(dirPath, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -136,11 +135,11 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
}
|
||||
major, err := strconv.ParseUint(d[0], 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
@@ -177,7 +176,7 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
|
29
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
29
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
@@ -1,19 +1,18 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// numToStr converts an int64 value to a string for writing to a
|
||||
@@ -75,8 +74,8 @@ func setMemory(dirPath string, r *configs.Resources) error {
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -86,10 +85,13 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
|
||||
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
|
||||
// cgroup v2 is always hierarchical.
|
||||
@@ -139,13 +141,13 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
// swapaccount=0 kernel boot parameter is given.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
@@ -153,7 +155,8 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
}
|
||||
|
||||
func statsFromMeminfo(stats *cgroups.Stats) error {
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
const file = "/proc/meminfo"
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -190,7 +193,7 @@ func statsFromMeminfo(stats *cgroups.Stats) error {
|
||||
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
|
||||
*p, err = strconv.ParseUint(vStr, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parsing /proc/meminfo "+k)
|
||||
return &parseError{File: file, Err: errors.New("bad value for " + k)}
|
||||
}
|
||||
|
||||
found++
|
||||
@@ -199,8 +202,8 @@ func statsFromMeminfo(stats *cgroups.Stats) error {
|
||||
break
|
||||
}
|
||||
}
|
||||
if sc.Err() != nil {
|
||||
return sc.Err()
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: "", File: file, Err: err}
|
||||
}
|
||||
|
||||
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
|
||||
|
29
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
29
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
@@ -1,17 +1,16 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isPidsSet(r *configs.Resources) bool {
|
||||
@@ -53,22 +52,18 @@ func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
if os.IsNotExist(err) {
|
||||
return statPidsFromCgroupProcs(dirPath, stats)
|
||||
}
|
||||
return errors.Wrap(err, "failed to parse pids.current")
|
||||
return err
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max")
|
||||
max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.max")
|
||||
return err
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q",
|
||||
maxString, filepath.Join(dirPath, "pids.max"))
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
|
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// parseRdmaKV parses raw string to RdmaEntry.
|
||||
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
|
||||
var value uint32
|
||||
|
||||
parts := strings.SplitN(raw, "=", 3)
|
||||
|
||||
if len(parts) != 2 {
|
||||
return errors.New("Unable to parse RDMA entry")
|
||||
}
|
||||
|
||||
k, v := parts[0], parts[1]
|
||||
|
||||
if v == "max" {
|
||||
value = math.MaxUint32
|
||||
} else {
|
||||
val64, err := strconv.ParseUint(v, 10, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value = uint32(val64)
|
||||
}
|
||||
if k == "hca_handle" {
|
||||
entry.HcaHandles = value
|
||||
} else if k == "hca_object" {
|
||||
entry.HcaObjects = value
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
|
||||
// example entry: mlx4_0 hca_handle=2 hca_object=2000
|
||||
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
|
||||
rdmaEntries := make([]cgroups.RdmaEntry, 0)
|
||||
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fd.Close() //nolint:errorlint
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
parts := strings.SplitN(scanner.Text(), " ", 4)
|
||||
if len(parts) == 3 {
|
||||
entry := new(cgroups.RdmaEntry)
|
||||
entry.Device = parts[0]
|
||||
err = parseRdmaKV(parts[1], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
err = parseRdmaKV(parts[2], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
rdmaEntries = append(rdmaEntries, *entry)
|
||||
}
|
||||
}
|
||||
return rdmaEntries, scanner.Err()
|
||||
}
|
||||
|
||||
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
|
||||
func RdmaGetStats(path string, stats *cgroups.Stats) error {
|
||||
currentEntries, err := readRdmaEntries(path, "rdma.current")
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
maxEntries, err := readRdmaEntries(path, "rdma.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If device got removed between reading two files, ignore returning stats.
|
||||
if len(currentEntries) != len(maxEntries) {
|
||||
return nil
|
||||
}
|
||||
|
||||
stats.RdmaStats = cgroups.RdmaStats{
|
||||
RdmaLimit: maxEntries,
|
||||
RdmaCurrent: currentEntries,
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createCmdString(device string, limits configs.LinuxRdma) string {
|
||||
cmdString := device
|
||||
if limits.HcaHandles != nil {
|
||||
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
|
||||
}
|
||||
if limits.HcaObjects != nil {
|
||||
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
|
||||
}
|
||||
return cmdString
|
||||
}
|
||||
|
||||
// RdmaSet sets RDMA resources.
|
||||
func RdmaSet(path string, r *configs.Resources) error {
|
||||
for device, limits := range r.Rdma {
|
||||
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
@@ -1,11 +1,10 @@
|
||||
// +build linux
|
||||
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -13,8 +12,6 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotValidFormat = errors.New("line is not a valid key value format")
|
||||
|
||||
// Deprecated: use cgroups.OpenFile instead.
|
||||
OpenFile = cgroups.OpenFile
|
||||
// Deprecated: use cgroups.ReadFile instead.
|
||||
@@ -23,6 +20,19 @@ var (
|
||||
WriteFile = cgroups.WriteFile
|
||||
)
|
||||
|
||||
// ParseError records a parse error details, including the file path.
|
||||
type ParseError struct {
|
||||
Path string
|
||||
File string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *ParseError) Error() string {
|
||||
return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
func (e *ParseError) Unwrap() error { return e.Err }
|
||||
|
||||
// ParseUint converts a string to an uint64 integer.
|
||||
// Negative values are returned at zero as, due to kernel bugs,
|
||||
// some of the memory cgroup stats can be negative.
|
||||
@@ -34,7 +44,7 @@ func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
|
||||
} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
@@ -56,7 +66,7 @@ func ParseKeyValue(t string) (string, uint64, error) {
|
||||
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
|
||||
return "", 0, err
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
@@ -71,11 +81,15 @@ func GetValueByKey(path, file, key string) (uint64, error) {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
return ParseUint(arr[1], 10, 64)
|
||||
val, err := ParseUint(arr[1], 10, 64)
|
||||
if err != nil {
|
||||
err = &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return val, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,7 +110,7 @@ func GetCgroupParamUint(path, file string) (uint64, error) {
|
||||
|
||||
res, err := ParseUint(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse file %q", path+"/"+file)
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
@@ -115,7 +129,7 @@ func GetCgroupParamInt(path, file string) (int64, error) {
|
||||
|
||||
res, err := strconv.ParseInt(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a int from Cgroup file %q", contents, path+"/"+file)
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetAllPids returns all pids from the cgroup identified by path, and all its
|
||||
// sub-cgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
if !d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
cPids, err := readProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids = append(pids, cPids...)
|
||||
return nil
|
||||
})
|
||||
return pids, err
|
||||
}
|
78
vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
generated
vendored
Normal file
78
vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
generated
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
package manager
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// New returns the instance of a cgroup manager, which is chosen
|
||||
// based on the local environment (whether cgroup v1 or v2 is used)
|
||||
// and the config (whether config.Systemd is set or not).
|
||||
func New(config *configs.Cgroup) (cgroups.Manager, error) {
|
||||
return NewWithPaths(config, nil)
|
||||
}
|
||||
|
||||
// NewWithPaths is similar to New, and can be used in case cgroup paths
|
||||
// are already well known, which can save some resources.
|
||||
//
|
||||
// For cgroup v1, the keys are controller/subsystem name, and the values
|
||||
// are absolute filesystem paths to the appropriate cgroups.
|
||||
//
|
||||
// For cgroup v2, the only key allowed is "" (empty string), and the value
|
||||
// is the unified cgroup path.
|
||||
func NewWithPaths(config *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
return nil, errors.New("cgroups/manager.New: config must not be nil")
|
||||
}
|
||||
if config.Systemd && !systemd.IsRunningSystemd() {
|
||||
return nil, errors.New("systemd not running on this host, cannot use systemd cgroups manager")
|
||||
}
|
||||
|
||||
// Cgroup v2 aka unified hierarchy.
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
path, err := getUnifiedPath(paths)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("manager.NewWithPaths: inconsistent paths: %w", err)
|
||||
}
|
||||
if config.Systemd {
|
||||
return systemd.NewUnifiedManager(config, path)
|
||||
}
|
||||
return fs2.NewManager(config, path)
|
||||
}
|
||||
|
||||
// Cgroup v1.
|
||||
if config.Systemd {
|
||||
return systemd.NewLegacyManager(config, paths)
|
||||
}
|
||||
|
||||
return fs.NewManager(config, paths)
|
||||
}
|
||||
|
||||
// getUnifiedPath is an implementation detail of libcontainer factory.
|
||||
// Historically, it saves cgroup paths as per-subsystem path map (as returned
|
||||
// by cm.GetPaths(""), but with v2 we only have one single unified path
|
||||
// (with "" as a key).
|
||||
//
|
||||
// This function converts from that map to string (using "" as a key),
|
||||
// and also checks that the map itself is sane.
|
||||
func getUnifiedPath(paths map[string]string) (string, error) {
|
||||
if len(paths) > 1 {
|
||||
return "", fmt.Errorf("expected a single path, got %+v", paths)
|
||||
}
|
||||
path := paths[""]
|
||||
// can be empty
|
||||
if path != "" {
|
||||
if filepath.Clean(path) != path || !filepath.IsAbs(path) {
|
||||
return "", fmt.Errorf("invalid path: %q", path)
|
||||
}
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
type ThrottlingData struct {
|
||||
@@ -126,7 +124,7 @@ type BlkioStatEntry struct {
|
||||
}
|
||||
|
||||
type BlkioStats struct {
|
||||
// number of bytes tranferred to and from the block device
|
||||
// number of bytes transferred to and from the block device
|
||||
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
|
||||
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
|
||||
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
|
||||
@@ -146,6 +144,17 @@ type HugetlbStats struct {
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type RdmaEntry struct {
|
||||
Device string `json:"device,omitempty"`
|
||||
HcaHandles uint32 `json:"hca_handles,omitempty"`
|
||||
HcaObjects uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
||||
|
||||
type RdmaStats struct {
|
||||
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
|
||||
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
|
||||
@@ -154,6 +163,7 @@ type Stats struct {
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
|
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
@@ -3,6 +3,7 @@ package systemd
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
@@ -14,11 +15,11 @@ import (
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -92,7 +93,7 @@ func groupPrefix(ruleType devices.Type) (string, error) {
|
||||
case devices.CharDevice:
|
||||
return "char-", nil
|
||||
default:
|
||||
return "", errors.Errorf("device type %v has no group prefix", ruleType)
|
||||
return "", fmt.Errorf("device type %v has no group prefix", ruleType)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,9 +143,9 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
|
||||
)
|
||||
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
|
||||
if err == nil {
|
||||
err = errors.Errorf("wrong number of fields")
|
||||
err = errors.New("wrong number of fields")
|
||||
}
|
||||
return "", errors.Wrapf(err, "scan /proc/devices line %q", line)
|
||||
return "", fmt.Errorf("scan /proc/devices line %q: %w", line, err)
|
||||
}
|
||||
|
||||
if currMajor == ruleMajor {
|
||||
@@ -152,7 +153,7 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", errors.Wrap(err, "reading /proc/devices")
|
||||
return "", fmt.Errorf("reading /proc/devices: %w", err)
|
||||
}
|
||||
// Couldn't find the device group.
|
||||
return "", nil
|
||||
@@ -192,12 +193,12 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
|
||||
configEmu := &cgroupdevices.Emulator{}
|
||||
for _, rule := range r.Devices {
|
||||
if err := configEmu.Apply(*rule); err != nil {
|
||||
return nil, errors.Wrap(err, "apply rule for systemd")
|
||||
return nil, fmt.Errorf("unable to apply rule for systemd: %w", err)
|
||||
}
|
||||
}
|
||||
// systemd doesn't support blacklists. So we log a warning, and tell
|
||||
// systemd to act as a deny-all whitelist. This ruleset will be replaced
|
||||
// with our normal fallback code. This may result in spurrious errors, but
|
||||
// with our normal fallback code. This may result in spurious errors, but
|
||||
// the only other option is to error out here.
|
||||
if configEmu.IsBlacklist() {
|
||||
// However, if we're dealing with an allow-all rule then we can do it.
|
||||
@@ -213,19 +214,19 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
|
||||
// whitelist which is the default for devices.Emulator.
|
||||
finalRules, err := configEmu.Rules()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get simplified rules for systemd")
|
||||
return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err)
|
||||
}
|
||||
var deviceAllowList []deviceAllowEntry
|
||||
for _, rule := range finalRules {
|
||||
if !rule.Allow {
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||
return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||
}
|
||||
switch rule.Type {
|
||||
case devices.BlockDevice, devices.CharDevice:
|
||||
default:
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||
return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||
}
|
||||
|
||||
entry := deviceAllowEntry{
|
||||
@@ -271,7 +272,7 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
|
||||
// "_ n:* _" rules require a device group from /proc/devices.
|
||||
group, err := findDeviceGroup(rule.Type, rule.Major)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major)
|
||||
return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err)
|
||||
}
|
||||
if group == "" {
|
||||
// Couldn't find a group.
|
||||
@@ -350,7 +351,7 @@ func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Pr
|
||||
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
resetFailedUnit(cm, unitName)
|
||||
return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
resetFailedUnit(cm, unitName)
|
||||
@@ -449,10 +450,13 @@ func systemdVersionAtoi(verStr string) (int, error) {
|
||||
re := regexp.MustCompile(`v?([0-9]+)`)
|
||||
matches := re.FindStringSubmatch(verStr)
|
||||
if len(matches) < 2 {
|
||||
return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||
return 0, fmt.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||
}
|
||||
ver, err := strconv.Atoi(matches[1])
|
||||
return ver, errors.Wrapf(err, "can't parse version %s", verStr)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("can't parse version: %w", err)
|
||||
}
|
||||
return ver, nil
|
||||
}
|
||||
|
||||
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
|
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
@@ -1,12 +1,10 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"math/big"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// RangeToBits converts a text representation of a CPU mask (as written to
|
||||
@@ -14,7 +12,7 @@ import (
|
||||
// with the corresponding bits set (as consumed by systemd over dbus as
|
||||
// AllowedCPUs/AllowedMemoryNodes unit property value).
|
||||
func RangeToBits(str string) ([]byte, error) {
|
||||
bits := &bitset.BitSet{}
|
||||
bits := new(big.Int)
|
||||
|
||||
for _, r := range strings.Split(str, ",") {
|
||||
// allow extra spaces around
|
||||
@@ -36,32 +34,22 @@ func RangeToBits(str string) ([]byte, error) {
|
||||
if start > end {
|
||||
return nil, errors.New("invalid range: " + r)
|
||||
}
|
||||
for i := uint(start); i <= uint(end); i++ {
|
||||
bits.Set(i)
|
||||
for i := start; i <= end; i++ {
|
||||
bits.SetBit(bits, int(i), 1)
|
||||
}
|
||||
} else {
|
||||
val, err := strconv.ParseUint(ranges[0], 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bits.Set(uint(val))
|
||||
bits.SetBit(bits, int(val), 1)
|
||||
}
|
||||
}
|
||||
|
||||
val := bits.Bytes()
|
||||
if len(val) == 0 {
|
||||
ret := bits.Bytes()
|
||||
if len(ret) == 0 {
|
||||
// do not allow empty values
|
||||
return nil, errors.New("empty value")
|
||||
}
|
||||
ret := make([]byte, len(val)*8)
|
||||
for i := range val {
|
||||
// bitset uses BigEndian internally
|
||||
binary.BigEndian.PutUint64(ret[i*8:], val[len(val)-1-i])
|
||||
}
|
||||
// remove upper all-zero bytes
|
||||
for ret[0] == 0 {
|
||||
ret = ret[1:]
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
|
71
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go
generated
vendored
71
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go
generated
vendored
@@ -1,71 +0,0 @@
|
||||
// +build !linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func IsRunningSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Path(_ string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return false
|
||||
}
|
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
@@ -1,10 +1,10 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// newUserSystemdDbus creates a connection for systemd user-instance.
|
||||
@@ -31,17 +31,17 @@ func newUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||
conn, err := dbus.Dial(addr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "error while dialing %q", addr)
|
||||
return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
|
||||
}
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
|
||||
return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
|
||||
return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
return conn, nil
|
||||
})
|
||||
@@ -57,7 +57,7 @@ func DetectUID() (int, error) {
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, errors.Wrapf(err, "could not execute `busctl --user --no-pager status`: %q", string(b))
|
||||
return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
@@ -66,7 +66,7 @@ func DetectUID() (int, error) {
|
||||
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||
i, err := strconv.Atoi(uidStr)
|
||||
if err != nil {
|
||||
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
|
||||
return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
@@ -93,7 +93,7 @@ func DetectUserDbusSessionBusAddress() (string, error) {
|
||||
}
|
||||
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
|
||||
return "", fmt.Errorf("could not execute `systemctl --user --no-pager show-environment` (output=%q): %w", string(b), err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
|
139
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
139
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
@@ -26,12 +24,25 @@ type legacyManager struct {
|
||||
dbus *dbusConnManager
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
if cg.Rootless {
|
||||
return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
|
||||
}
|
||||
if cg.Resources != nil && cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &legacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
dbus: newDbusConnManager(false),
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
@@ -59,6 +70,7 @@ var legacySubsystems = []subsystem{
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
&fs.RdmaGroup{},
|
||||
}
|
||||
|
||||
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
@@ -100,6 +112,53 @@ func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// initPaths figures out and returns paths to cgroups.
|
||||
func initPaths(c *configs.Cgroup) (map[string]string, error) {
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
unit := getUnitName(c)
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if s.Name() == "devices" {
|
||||
return nil, err
|
||||
}
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
|
||||
// If systemd is using cgroups-hybrid mode then add the slice path of
|
||||
// this container to the paths so the following process executed with
|
||||
// "runc exec" joins that cgroup as well.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
// "" means cgroup-hybrid path
|
||||
cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
|
||||
if err != nil && cgroups.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
paths[""] = cgroupsHybridPath
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
@@ -108,27 +167,8 @@ func (m *legacyManager) Apply(pid int) error {
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Resources.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX(kolyshkin@): why this check is needed?
|
||||
for name, path := range c.Paths {
|
||||
if _, ok := cgMap[name]; ok {
|
||||
paths[name] = path
|
||||
}
|
||||
}
|
||||
m.paths = paths
|
||||
return cgroups.EnterPid(m.paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
@@ -136,12 +176,14 @@ func (m *legacyManager) Apply(pid int) error {
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
@@ -149,12 +191,6 @@ func (m *legacyManager) Apply(pid int) error {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
@@ -174,26 +210,6 @@ func (m *legacyManager) Apply(pid int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if s.Name() == "devices" {
|
||||
return err
|
||||
}
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.paths = paths
|
||||
|
||||
if err := m.joinCgroups(pid); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -202,9 +218,6 @@ func (m *legacyManager) Apply(pid int) error {
|
||||
}
|
||||
|
||||
func (m *legacyManager) Destroy() error {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
@@ -254,7 +267,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
func getSubsystemPath(slice, unit, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -267,17 +280,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
|
||||
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
|
||||
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
slice, err = ExpandSlice(slice)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
return filepath.Join(mountpoint, initPath, slice, unit), nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) Freeze(state configs.FreezerState) error {
|
||||
@@ -399,9 +402,7 @@ func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (
|
||||
}
|
||||
|
||||
func (m *legacyManager) Set(r *configs.Resources) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.cgroups.Paths != nil {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if r.Unified != nil {
|
||||
|
156
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
156
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
@@ -1,10 +1,10 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -12,29 +12,39 @@ import (
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type unifiedManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
rootless bool
|
||||
dbus *dbusConnManager
|
||||
path string
|
||||
dbus *dbusConnManager
|
||||
fsMgr cgroups.Manager
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
|
||||
return &unifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
rootless: rootless,
|
||||
dbus: newDbusConnManager(rootless),
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string) (cgroups.Manager, error) {
|
||||
m := &unifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
dbus: newDbusConnManager(config.Rootless),
|
||||
}
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fsMgr, err := fs2.NewManager(config, m.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.fsMgr = fsMgr
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
|
||||
@@ -233,12 +243,8 @@ func (m *unifiedManager) Apply(pid int) error {
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
return cgroups.WriteCgroupProc(m.path, pid)
|
||||
}
|
||||
|
||||
slice := "system.slice"
|
||||
if m.rootless {
|
||||
if m.cgroups.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
@@ -247,12 +253,14 @@ func (m *unifiedManager) Apply(pid int) error {
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
@@ -260,12 +268,6 @@ func (m *unifiedManager) Apply(pid int) error {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
@@ -282,22 +284,53 @@ func (m *unifiedManager) Apply(pid int) error {
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties); err != nil {
|
||||
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
|
||||
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
|
||||
}
|
||||
|
||||
if err := m.initPath(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.OwnerUID != nil {
|
||||
filesToChown, err := cgroupFilesToChown()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, v := range filesToChown {
|
||||
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Destroy() error {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
// The kernel exposes a list of files that should be chowned to the delegate
|
||||
// uid in /sys/kernel/cgroup/delegate. If the file is not present
|
||||
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
|
||||
func cgroupFilesToChown() ([]string, error) {
|
||||
filesToChown := []string{"."} // the directory itself must be chowned
|
||||
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
|
||||
f, err := os.Open(cgroupDelegateFile)
|
||||
if err == nil {
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
filesToChown = append(filesToChown, scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
|
||||
}
|
||||
} else {
|
||||
filesToChown = append(filesToChown, "cgroup.procs", "cgroup.subtree_control", "cgroup.threads")
|
||||
}
|
||||
return filesToChown, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
@@ -307,8 +340,8 @@ func (m *unifiedManager) Destroy() error {
|
||||
}
|
||||
|
||||
// systemd 239 do not remove sub-cgroups.
|
||||
err := cgroups.RemovePath(m.path)
|
||||
// cgroups.RemovePath has handled ErrNotExist
|
||||
err := m.fsMgr.Destroy()
|
||||
// fsMgr.Destroy has handled ErrNotExist
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -317,7 +350,6 @@ func (m *unifiedManager) Destroy() error {
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Path(_ string) string {
|
||||
_ = m.initPath()
|
||||
return m.path
|
||||
}
|
||||
|
||||
@@ -326,7 +358,7 @@ func (m *unifiedManager) Path(_ string) string {
|
||||
func (m *unifiedManager) getSliceFull() (string, error) {
|
||||
c := m.cgroups
|
||||
slice := "system.slice"
|
||||
if m.rootless {
|
||||
if c.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
@@ -337,7 +369,7 @@ func (m *unifiedManager) getSliceFull() (string, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if m.rootless {
|
||||
if c.Rootless {
|
||||
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
|
||||
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
|
||||
if err != nil {
|
||||
@@ -375,58 +407,36 @@ func (m *unifiedManager) initPath() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.cgroups, m.path, m.rootless)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
return m.fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPids() ([]int, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetAllPids() ([]int, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
return m.fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||
return errors.Wrap(err, "error while setting unit properties")
|
||||
return fmt.Errorf("unable to set unit properties: %w", err)
|
||||
}
|
||||
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(r)
|
||||
return m.fsMgr.Set(r)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPaths() map[string]string {
|
||||
@@ -440,11 +450,7 @@ func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return fsMgr.GetFreezerState()
|
||||
return m.fsMgr.GetFreezerState()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Exists() bool {
|
||||
@@ -452,9 +458,5 @@ func (m *unifiedManager) Exists() bool {
|
||||
}
|
||||
|
||||
func (m *unifiedManager) OOMKillCount() (uint64, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return fsMgr.OOMKillCount()
|
||||
return m.fsMgr.OOMKillCount()
|
||||
}
|
||||
|
115
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
115
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
@@ -7,7 +5,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -23,11 +20,14 @@ import (
|
||||
const (
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
hybridMountpoint = "/sys/fs/cgroup/unified"
|
||||
)
|
||||
|
||||
var (
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
isHybridOnce sync.Once
|
||||
isHybrid bool
|
||||
)
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
@@ -49,6 +49,24 @@ func IsCgroup2UnifiedMode() bool {
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
|
||||
func IsCgroup2HybridMode() bool {
|
||||
isHybridOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(hybridMountpoint, &st)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// ignore the "not found" error
|
||||
isHybrid = false
|
||||
return
|
||||
}
|
||||
panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
|
||||
}
|
||||
isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isHybrid
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Mountpoint string
|
||||
Root string
|
||||
@@ -118,8 +136,8 @@ func GetAllSubsystems() ([]string, error) {
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
func readProcsFile(file string) ([]int, error) {
|
||||
f, err := os.Open(file)
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -210,7 +228,7 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
|
||||
|
||||
func rmdir(path string) error {
|
||||
err := unix.Rmdir(path)
|
||||
if err == nil || err == unix.ENOENT {
|
||||
if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||
return nil
|
||||
}
|
||||
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
||||
@@ -224,7 +242,7 @@ func RemovePath(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
infos, err := ioutil.ReadDir(path)
|
||||
infos, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
err = nil
|
||||
@@ -284,40 +302,61 @@ func RemovePaths(paths map[string]string) (err error) {
|
||||
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||
}
|
||||
|
||||
func GetHugePageSize() ([]string, error) {
|
||||
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
files, err := dir.Readdirnames(0)
|
||||
dir.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var (
|
||||
hugePageSizes []string
|
||||
initHPSOnce sync.Once
|
||||
)
|
||||
|
||||
return getHugePageSizeFromFilenames(files)
|
||||
func HugePageSizes() []string {
|
||||
initHPSOnce.Do(func() {
|
||||
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
files, err := dir.Readdirnames(0)
|
||||
dir.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
hugePageSizes, err = getHugePageSizeFromFilenames(files)
|
||||
if err != nil {
|
||||
logrus.Warn("HugePageSizes: ", err)
|
||||
}
|
||||
})
|
||||
|
||||
return hugePageSizes
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
pageSizes := make([]string, 0, len(fileNames))
|
||||
var warn error
|
||||
|
||||
for _, file := range fileNames {
|
||||
// example: hugepages-1048576kB
|
||||
val := strings.TrimPrefix(file, "hugepages-")
|
||||
if len(val) == len(file) {
|
||||
// unexpected file name: no prefix found
|
||||
// Unexpected file name: no prefix found, ignore it.
|
||||
continue
|
||||
}
|
||||
// The suffix is always "kB" (as of Linux 5.9)
|
||||
// The suffix is always "kB" (as of Linux 5.13). If we find
|
||||
// something else, produce an error but keep going.
|
||||
eLen := len(val) - 2
|
||||
val = strings.TrimSuffix(val, "kB")
|
||||
if len(val) != eLen {
|
||||
logrus.Warnf("GetHugePageSize: %s: invalid filename suffix (expected \"kB\")", file)
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = errors.New(file + `: invalid suffix (expected "kB")`)
|
||||
}
|
||||
continue
|
||||
}
|
||||
size, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = fmt.Errorf("%s: %w", file, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
|
||||
// but in our case the size is in KB already.
|
||||
@@ -331,34 +370,12 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
pageSizes = append(pageSizes, val)
|
||||
}
|
||||
|
||||
return pageSizes, nil
|
||||
return pageSizes, warn
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(dir string) ([]int, error) {
|
||||
return readProcsFile(filepath.Join(dir, CgroupProcesses))
|
||||
}
|
||||
|
||||
// GetAllPids returns all pids, that were added to cgroup at path and to all its
|
||||
// subcgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
// collect pids from all sub-cgroups
|
||||
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
if info.IsDir() || info.Name() != CgroupProcesses {
|
||||
return nil
|
||||
}
|
||||
cPids, err := readProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids = append(pids, cPids...)
|
||||
return nil
|
||||
})
|
||||
return pids, err
|
||||
return readProcsFile(dir)
|
||||
}
|
||||
|
||||
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||
@@ -376,7 +393,7 @@ func WriteCgroupProc(dir string, pid int) error {
|
||||
|
||||
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
@@ -393,7 +410,7 @@ func WriteCgroupProc(dir string, pid int) error {
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -446,5 +463,5 @@ func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
|
||||
return 1 + (uint64(blkIoWeight)-10)*9999/990
|
||||
}
|
||||
|
19
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
19
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
@@ -46,11 +46,8 @@ func NewNotFoundError(sub string) error {
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
var nfErr *NotFoundError
|
||||
return errors.As(err, &nfErr)
|
||||
}
|
||||
|
||||
func tryDefaultPath(cgroupPath, subsystem string) string {
|
||||
@@ -116,6 +113,11 @@ func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
// Avoid parsing mountinfo by trying the default path first, if possible.
|
||||
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
||||
return path, nil
|
||||
@@ -154,7 +156,7 @@ func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, sub
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
return "", errors.New("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
@@ -226,6 +228,11 @@ func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
|
20
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
@@ -28,17 +28,26 @@ type Cgroup struct {
|
||||
// ScopePrefix describes prefix for the scope name
|
||||
ScopePrefix string `json:"scope_prefix"`
|
||||
|
||||
// Paths represent the absolute cgroups paths to join.
|
||||
// This takes precedence over Path.
|
||||
Paths map[string]string
|
||||
|
||||
// Resources contains various cgroups settings to apply
|
||||
*Resources
|
||||
|
||||
// Systemd tells if systemd should be used to manage cgroups.
|
||||
Systemd bool
|
||||
|
||||
// SystemdProps are any additional properties for systemd,
|
||||
// derived from org.systemd.property.xxx annotations.
|
||||
// Ignored unless systemd is used for managing cgroups.
|
||||
SystemdProps []systemdDbus.Property `json:"-"`
|
||||
|
||||
// Rootless tells if rootless cgroups should be used.
|
||||
Rootless bool
|
||||
|
||||
// The host UID that should own the cgroup, or nil to accept
|
||||
// the default ownership. This should only be set when the
|
||||
// cgroupfs is to be mounted read/write.
|
||||
// Not all cgroup manager implementations support changing
|
||||
// the ownership.
|
||||
OwnerUID *int `json:"owner_uid,omitempty"`
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
@@ -117,6 +126,9 @@ type Resources struct {
|
||||
// Set class identifier for container's network packets
|
||||
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||
|
||||
// Rdma resource restriction configuration
|
||||
Rdma map[string]LinuxRdma `json:"rdma"`
|
||||
|
||||
// Used on cgroups v2:
|
||||
|
||||
// CpuWeight sets a proportional bandwidth limit.
|
||||
|
1
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
34
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
34
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
@@ -7,10 +7,10 @@ import (
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Rlimit struct {
|
||||
@@ -31,10 +31,12 @@ type IDMap struct {
|
||||
// for syscalls. Additional architectures can be added by specifying them in
|
||||
// Architectures.
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
DefaultErrnoRet *uint `json:"default_errno_ret"`
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
DefaultErrnoRet *uint `json:"default_errno_ret"`
|
||||
ListenerPath string `json:"listener_path,omitempty"`
|
||||
ListenerMetadata string `json:"listener_metadata,omitempty"`
|
||||
}
|
||||
|
||||
// Action is taken upon rule match in Seccomp
|
||||
@@ -47,6 +49,9 @@ const (
|
||||
Allow
|
||||
Trace
|
||||
Log
|
||||
Notify
|
||||
KillThread
|
||||
KillProcess
|
||||
)
|
||||
|
||||
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||
@@ -246,6 +251,19 @@ const (
|
||||
Poststop HookName = "poststop"
|
||||
)
|
||||
|
||||
// KnownHookNames returns the known hook names.
|
||||
// Used by `runc features`.
|
||||
func KnownHookNames() []string {
|
||||
return []string{
|
||||
string(Prestart), // deprecated
|
||||
string(CreateRuntime),
|
||||
string(CreateContainer),
|
||||
string(StartContainer),
|
||||
string(Poststart),
|
||||
string(Poststop),
|
||||
}
|
||||
}
|
||||
|
||||
type Capabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
Bounding []string
|
||||
@@ -262,7 +280,7 @@ type Capabilities struct {
|
||||
func (hooks HookList) RunHooks(state *specs.State) error {
|
||||
for i, h := range hooks {
|
||||
if err := h.Run(state); err != nil {
|
||||
return errors.Wrapf(err, "Running hook #%d:", i)
|
||||
return fmt.Errorf("error running hook #%d: %w", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -375,7 +393,7 @@ func (c Command) Run(s *specs.State) error {
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||
err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
errC <- err
|
||||
}()
|
||||
|
17
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
17
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
@@ -1,17 +1,24 @@
|
||||
package configs
|
||||
|
||||
import "fmt"
|
||||
import "errors"
|
||||
|
||||
var (
|
||||
errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
|
||||
errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.")
|
||||
errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.")
|
||||
errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
|
||||
)
|
||||
|
||||
// HostUID gets the translated uid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostUID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.UidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
|
||||
return -1, errNoUIDMap
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
|
||||
return -1, errNoUserMap
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
@@ -30,11 +37,11 @@ func (c Config) HostRootUID() (int, error) {
|
||||
func (c Config) HostGID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.GidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
|
||||
return -1, errNoGIDMap
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
|
||||
return -1, errNoGroupMap
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
1
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
package configs
|
||||
|
3
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
@@ -1,6 +1,9 @@
|
||||
package configs
|
||||
|
||||
type IntelRdt struct {
|
||||
// The identity for RDT Class of Service
|
||||
ClosID string `json:"closID,omitempty"`
|
||||
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
9
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
@@ -1,5 +1,7 @@
|
||||
package configs
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
const (
|
||||
// EXT_COPYUP is a directive to copy up the contents of a directory when
|
||||
// a tmpfs is mounted over it.
|
||||
@@ -28,6 +30,9 @@ type Mount struct {
|
||||
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
|
||||
Relabel string `json:"relabel"`
|
||||
|
||||
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
|
||||
RecAttr *unix.MountAttr `json:"rec_attr"`
|
||||
|
||||
// Extensions are additional flags that are specific to runc.
|
||||
Extensions int `json:"extensions"`
|
||||
|
||||
@@ -37,3 +42,7 @@ type Mount struct {
|
||||
// Optional Command to be run after Source is mounted.
|
||||
PostmountCmds []Command `json:"postmount_cmds"`
|
||||
}
|
||||
|
||||
func (m *Mount) IsBind() bool {
|
||||
return m.Flags&unix.MS_BIND != 0
|
||||
}
|
||||
|
1
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package configs
|
||||
|
@@ -1,3 +1,4 @@
|
||||
//go:build !linux && !windows
|
||||
// +build !linux,!windows
|
||||
|
||||
package configs
|
||||
|
1
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
Normal file
9
vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
package configs
|
||||
|
||||
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
|
||||
type LinuxRdma struct {
|
||||
// Maximum number of HCA handles that can be opened. Default is "no limit".
|
||||
HcaHandles *uint32 `json:"hca_handles,omitempty"`
|
||||
// Maximum number of HCA objects that can be created. Default is "no limit".
|
||||
HcaObjects *uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
61
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
61
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
@@ -52,7 +52,7 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
}
|
||||
for _, c := range warns {
|
||||
if err := c(config); err != nil {
|
||||
logrus.WithError(err).Warnf("invalid configuration")
|
||||
logrus.WithError(err).Warn("invalid configuration")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -62,20 +62,17 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
if _, err := os.Stat(config.Rootfs); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
|
||||
}
|
||||
return err
|
||||
return fmt.Errorf("invalid rootfs: %w", err)
|
||||
}
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("invalid rootfs: %w", err)
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("invalid rootfs: %w", err)
|
||||
}
|
||||
if filepath.Clean(config.Rootfs) != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
return errors.New("invalid rootfs: not an absolute path, or a symlink")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -131,6 +128,36 @@ func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format.
|
||||
// The '/' separator is also accepted in place of a '.'.
|
||||
// Convert the sysctl variables to dots separator format for validation.
|
||||
// More info:
|
||||
// https://man7.org/linux/man-pages/man8/sysctl.8.html
|
||||
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
|
||||
// For example:
|
||||
// Input sysctl variable "net/ipv4/conf/eno2.100.rp_filter"
|
||||
// will return the converted value "net.ipv4.conf.eno2/100.rp_filter"
|
||||
func convertSysctlVariableToDotsSeparator(val string) string {
|
||||
if val == "" {
|
||||
return val
|
||||
}
|
||||
firstSepIndex := strings.IndexAny(val, "./")
|
||||
if firstSepIndex == -1 || val[firstSepIndex] == '.' {
|
||||
return val
|
||||
}
|
||||
|
||||
f := func(r rune) rune {
|
||||
switch r {
|
||||
case '.':
|
||||
return '/'
|
||||
case '/':
|
||||
return '.'
|
||||
}
|
||||
return r
|
||||
}
|
||||
return strings.Map(f, val)
|
||||
}
|
||||
|
||||
// sysctl validates that the specified sysctl keys are valid or not.
|
||||
// /proc/sys isn't completely namespaced and depending on which namespaces
|
||||
// are specified, a subset of sysctls are permitted.
|
||||
@@ -153,6 +180,7 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
)
|
||||
|
||||
for s := range config.Sysctl {
|
||||
s := convertSysctlVariableToDotsSeparator(s)
|
||||
if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
|
||||
if config.Namespaces.Contains(configs.NEWIPC) {
|
||||
continue
|
||||
@@ -176,7 +204,7 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
hostnet, hostnetErr = isHostNetNS(path)
|
||||
})
|
||||
if hostnetErr != nil {
|
||||
return hostnetErr
|
||||
return fmt.Errorf("invalid netns path: %w", hostnetErr)
|
||||
}
|
||||
if hostnet {
|
||||
return fmt.Errorf("sysctl %q not allowed in host network namespace", s)
|
||||
@@ -205,19 +233,16 @@ func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||
}
|
||||
|
||||
if config.IntelRdt.ClosID == "." || config.IntelRdt.ClosID == ".." || strings.Contains(config.IntelRdt.ClosID, "/") {
|
||||
return fmt.Errorf("invalid intelRdt.ClosID %q", config.IntelRdt.ClosID)
|
||||
}
|
||||
|
||||
if !intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema != "" {
|
||||
return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||
}
|
||||
if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" {
|
||||
return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||
}
|
||||
|
||||
if intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema == "" {
|
||||
return errors.New("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
if intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema == "" {
|
||||
return errors.New("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -268,10 +293,10 @@ func isHostNetNS(path string) (bool, error) {
|
||||
var st1, st2 unix.Stat_t
|
||||
|
||||
if err := unix.Stat(currentProcessNetns, &st1); err != nil {
|
||||
return false, fmt.Errorf("unable to stat %q: %s", currentProcessNetns, err)
|
||||
return false, &os.PathError{Op: "stat", Path: currentProcessNetns, Err: err}
|
||||
}
|
||||
if err := unix.Stat(path, &st2); err != nil {
|
||||
return false, fmt.Errorf("unable to stat %q: %s", path, err)
|
||||
return false, &os.PathError{Op: "stat", Path: path, Err: err}
|
||||
}
|
||||
|
||||
return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
generated
vendored
@@ -18,7 +18,7 @@ func mountConsole(slavePath string) error {
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
|
||||
return mount(slavePath, "/dev/console", "", "bind", unix.MS_BIND, "")
|
||||
}
|
||||
|
||||
// dupStdio opens the slavePath for the console and dups the fds to the current
|
||||
|
43
vendor/github.com/opencontainers/runc/libcontainer/container.go
generated
vendored
43
vendor/github.com/opencontainers/runc/libcontainer/container.go
generated
vendored
@@ -74,22 +74,12 @@ type BaseContainer interface {
|
||||
ID() string
|
||||
|
||||
// Returns the current status of the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Status() (Status, error)
|
||||
|
||||
// State returns the current container's state information.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
State() (*State, error)
|
||||
|
||||
// OCIState returns the current container's state information.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
OCIState() (*specs.State, error)
|
||||
|
||||
// Returns the current config of the container.
|
||||
@@ -97,48 +87,26 @@ type BaseContainer interface {
|
||||
|
||||
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
//
|
||||
// Some of the returned PIDs may no longer refer to processes in the Container, unless
|
||||
// the Container state is PAUSED in which case every PID in the slice is valid.
|
||||
Processes() ([]int, error)
|
||||
|
||||
// Returns statistics for the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Stats() (*Stats, error)
|
||||
|
||||
// Set resources of container as configured
|
||||
//
|
||||
// We can use this to change resources when containers are running.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Set(config configs.Config) error
|
||||
|
||||
// Start a process inside the container. Returns error if process fails to
|
||||
// start. You can track process lifecycle with passed Process structure.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Start(process *Process) (err error)
|
||||
|
||||
// Run immediately starts the process inside the container. Returns error if process
|
||||
// fails to start. It does not block waiting for the exec fifo after start returns but
|
||||
// opens the fifo after start returns.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Run(process *Process) (err error)
|
||||
|
||||
// Destroys the container, if its in a valid state, after killing any
|
||||
@@ -149,25 +117,14 @@ type BaseContainer interface {
|
||||
//
|
||||
// Running containers must first be stopped using Signal(..).
|
||||
// Paused containers must first be resumed using Resume(..).
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotStopped - Container is still running,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Destroy() error
|
||||
|
||||
// Signal sends the provided signal code to the container's initial process.
|
||||
//
|
||||
// If all is specified the signal is sent to all processes in the container
|
||||
// including the initial process.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Signal(s os.Signal, all bool) error
|
||||
|
||||
// Exec signals the container to exec the users process at the end of the init.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Exec() error
|
||||
}
|
||||
|
317
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
317
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
@@ -8,10 +6,10 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
@@ -19,21 +17,20 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/checkpoint-restore/go-criu/v5"
|
||||
criurpc "github.com/checkpoint-restore/go-criu/v5/rpc"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/checkpoint-restore/go-criu/v5"
|
||||
criurpc "github.com/checkpoint-restore/go-criu/v5/rpc"
|
||||
errorsf "github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
"google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
const stdioFdCount = 3
|
||||
@@ -98,48 +95,26 @@ type Container interface {
|
||||
// Methods below here are platform specific
|
||||
|
||||
// Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Checkpoint(criuOpts *CriuOpts) error
|
||||
|
||||
// Restore restores the checkpointed container to a running state using the criu(8) utility.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Restore(process *Process, criuOpts *CriuOpts) error
|
||||
|
||||
// If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses
|
||||
// the execution of any user processes. Asynchronously, when the container finished being paused the
|
||||
// state is changed to PAUSED.
|
||||
// If the Container state is PAUSED, do nothing.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ContainerNotRunning - Container not running or created,
|
||||
// Systemerror - System error.
|
||||
Pause() error
|
||||
|
||||
// If the Container state is PAUSED, resumes the execution of any user processes in the
|
||||
// Container before setting the Container state to RUNNING.
|
||||
// If the Container state is RUNNING, do nothing.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ContainerNotPaused - Container is not paused,
|
||||
// Systemerror - System error.
|
||||
Resume() error
|
||||
|
||||
// NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
NotifyOOM() (<-chan struct{}, error)
|
||||
|
||||
// NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error)
|
||||
}
|
||||
|
||||
@@ -184,7 +159,7 @@ func (c *linuxContainer) Processes() ([]int, error) {
|
||||
|
||||
pids, err = c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
|
||||
return nil, fmt.Errorf("unable to get all container pids: %w", err)
|
||||
}
|
||||
return pids, nil
|
||||
}
|
||||
@@ -195,11 +170,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
||||
stats = &Stats{}
|
||||
)
|
||||
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
|
||||
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
|
||||
return stats, fmt.Errorf("unable to get container cgroup stats: %w", err)
|
||||
}
|
||||
if c.intelRdtManager != nil {
|
||||
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
|
||||
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
|
||||
return stats, fmt.Errorf("unable to get container Intel RDT stats: %w", err)
|
||||
}
|
||||
}
|
||||
for _, iface := range c.config.Networks {
|
||||
@@ -207,7 +182,7 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
||||
case "veth":
|
||||
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
|
||||
if err != nil {
|
||||
return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
|
||||
return stats, fmt.Errorf("unable to get network stats for interface %q: %w", iface.HostInterfaceName, err)
|
||||
}
|
||||
stats.Interfaces = append(stats.Interfaces, istats)
|
||||
}
|
||||
@@ -223,7 +198,7 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
return ErrNotRunning
|
||||
}
|
||||
if err := c.cgroupManager.Set(config.Cgroups.Resources); err != nil {
|
||||
// Set configs back
|
||||
@@ -254,7 +229,7 @@ func (c *linuxContainer) Start(process *Process) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
if c.config.Cgroups.Resources.SkipDevices {
|
||||
return newGenericError(errors.New("can't start container with SkipDevices set"), ConfigInvalid)
|
||||
return errors.New("can't start container with SkipDevices set")
|
||||
}
|
||||
if process.Init {
|
||||
if err := c.createExecFifo(); err != nil {
|
||||
@@ -310,7 +285,7 @@ func (c *linuxContainer) exec() error {
|
||||
}
|
||||
|
||||
func readFromExecFifo(execFifo io.Reader) error {
|
||||
data, err := ioutil.ReadAll(execFifo)
|
||||
data, err := io.ReadAll(execFifo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -336,7 +311,7 @@ func fifoOpen(path string, block bool) openResult {
|
||||
}
|
||||
f, err := os.OpenFile(path, flags, 0)
|
||||
if err != nil {
|
||||
return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
return openResult{err: fmt.Errorf("exec fifo: %w", err)}
|
||||
}
|
||||
return openResult{file: f}
|
||||
}
|
||||
@@ -361,7 +336,7 @@ type openResult struct {
|
||||
func (c *linuxContainer) start(process *Process) (retErr error) {
|
||||
parent, err := c.newParentProcess(process)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
return fmt.Errorf("unable to create new parent process: %w", err)
|
||||
}
|
||||
|
||||
logsDone := parent.forwardChildLogs()
|
||||
@@ -371,13 +346,13 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
|
||||
// runc init closing the _LIBCONTAINER_LOGPIPE log fd.
|
||||
err := <-logsDone
|
||||
if err != nil && retErr == nil {
|
||||
retErr = newSystemErrorWithCause(err, "forwarding init logs")
|
||||
retErr = fmt.Errorf("unable to forward init logs: %w", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if err := parent.start(); err != nil {
|
||||
return newSystemErrorWithCause(err, "starting container process")
|
||||
return fmt.Errorf("unable to start container process: %w", err)
|
||||
}
|
||||
|
||||
if process.Init {
|
||||
@@ -390,7 +365,7 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
|
||||
|
||||
if err := c.config.Hooks[configs.Poststart].RunHooks(s); err != nil {
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
logrus.Warn(errorsf.Wrapf(err, "Running Poststart hook"))
|
||||
logrus.Warn(fmt.Errorf("error running poststart hook: %w", err))
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -416,11 +391,19 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
// to avoid a PID reuse attack
|
||||
if status == Running || status == Created || status == Paused {
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
return newSystemErrorWithCause(err, "signaling init process")
|
||||
return fmt.Errorf("unable to signal init: %w", err)
|
||||
}
|
||||
if status == Paused {
|
||||
// For cgroup v1, killing a process in a frozen cgroup
|
||||
// does nothing until it's thawed. Only thaw the cgroup
|
||||
// for SIGKILL.
|
||||
if s, ok := s.(unix.Signal); ok && s == unix.SIGKILL {
|
||||
_ = c.cgroupManager.Freeze(configs.Thawed)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
return ErrNotRunning
|
||||
}
|
||||
|
||||
func (c *linuxContainer) createExecFifo() error {
|
||||
@@ -472,13 +455,13 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
|
||||
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
return nil, fmt.Errorf("unable to create init pipe: %w", err)
|
||||
}
|
||||
messageSockPair := filePair{parentInitPipe, childInitPipe}
|
||||
|
||||
parentLogPipe, childLogPipe, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
|
||||
return nil, fmt.Errorf("unable to create log pipe: %w", err)
|
||||
}
|
||||
logFilePair := filePair{parentLogPipe, childLogPipe}
|
||||
|
||||
@@ -493,7 +476,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
// that problem), but we no longer do that. However, there's no need to do
|
||||
// this for `runc exec` so we just keep it this way to be safe.
|
||||
if err := c.includeExecFifo(cmd); err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
|
||||
return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
|
||||
}
|
||||
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
@@ -537,6 +520,33 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
|
||||
return cmd
|
||||
}
|
||||
|
||||
// shouldSendMountSources says whether the child process must setup bind mounts with
|
||||
// the source pre-opened (O_PATH) in the host user namespace.
|
||||
// See https://github.com/opencontainers/runc/issues/2484
|
||||
func (c *linuxContainer) shouldSendMountSources() bool {
|
||||
// Passing the mount sources via SCM_RIGHTS is only necessary when
|
||||
// both userns and mntns are active.
|
||||
if !c.config.Namespaces.Contains(configs.NEWUSER) ||
|
||||
!c.config.Namespaces.Contains(configs.NEWNS) {
|
||||
return false
|
||||
}
|
||||
|
||||
// nsexec.c send_mountsources() requires setns(mntns) capabilities
|
||||
// CAP_SYS_CHROOT and CAP_SYS_ADMIN.
|
||||
if c.config.RootlessEUID {
|
||||
return false
|
||||
}
|
||||
|
||||
// We need to send sources if there are bind-mounts.
|
||||
for _, m := range c.config.Mounts {
|
||||
if m.IsBind() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||
nsMaps := make(map[configs.NamespaceType]string)
|
||||
@@ -546,10 +556,40 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa
|
||||
}
|
||||
}
|
||||
_, sharePidns := nsMaps[configs.NEWPID]
|
||||
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
|
||||
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if c.shouldSendMountSources() {
|
||||
// Elements on this slice will be paired with mounts (see StartInitialization() and
|
||||
// prepareRootfs()). This slice MUST have the same size as c.config.Mounts.
|
||||
mountFds := make([]int, len(c.config.Mounts))
|
||||
for i, m := range c.config.Mounts {
|
||||
if !m.IsBind() {
|
||||
// Non bind-mounts do not use an fd.
|
||||
mountFds[i] = -1
|
||||
continue
|
||||
}
|
||||
|
||||
// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
|
||||
// to allocate a fd so that we know the number to pass in the environment variable. The fd
|
||||
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
|
||||
// lifecycle of that fd is already taken care of.
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
|
||||
mountFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
|
||||
}
|
||||
|
||||
mountFdsJson, err := json.Marshal(mountFds)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error creating _LIBCONTAINER_MOUNT_FDS: %w", err)
|
||||
}
|
||||
|
||||
cmd.Env = append(cmd.Env,
|
||||
"_LIBCONTAINER_MOUNT_FDS="+string(mountFdsJson),
|
||||
)
|
||||
}
|
||||
|
||||
init := &initProcess{
|
||||
cmd: cmd,
|
||||
messageSockPair: messageSockPair,
|
||||
@@ -570,15 +610,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "getting container's current state")
|
||||
return nil, fmt.Errorf("unable to get container state: %w", err)
|
||||
}
|
||||
// for setns process, we don't have to set cloneflags as the process namespaces
|
||||
// will only be set via setns syscall
|
||||
data, err := c.bootstrapData(0, state.NamespacePaths)
|
||||
data, err := c.bootstrapData(0, state.NamespacePaths, initSetns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &setnsProcess{
|
||||
proc := &setnsProcess{
|
||||
cmd: cmd,
|
||||
cgroupPaths: state.CgroupPaths,
|
||||
rootlessCgroups: c.config.RootlessCgroups,
|
||||
@@ -590,7 +630,29 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
initProcessPid: state.InitProcessPid,
|
||||
}, nil
|
||||
}
|
||||
if len(p.SubCgroupPaths) > 0 {
|
||||
if add, ok := p.SubCgroupPaths[""]; ok {
|
||||
// cgroup v1: using the same path for all controllers.
|
||||
// cgroup v2: the only possible way.
|
||||
for k := range proc.cgroupPaths {
|
||||
proc.cgroupPaths[k] = path.Join(proc.cgroupPaths[k], add)
|
||||
}
|
||||
// cgroup v2: do not try to join init process's cgroup
|
||||
// as a fallback (see (*setnsProcess).start).
|
||||
proc.initProcessPid = 0
|
||||
} else {
|
||||
// Per-controller paths.
|
||||
for ctrl, add := range p.SubCgroupPaths {
|
||||
if val, ok := proc.cgroupPaths[ctrl]; ok {
|
||||
proc.cgroupPaths[ctrl] = path.Join(val, add)
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return proc, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
@@ -655,7 +717,7 @@ func (c *linuxContainer) Pause() error {
|
||||
c: c,
|
||||
})
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning)
|
||||
return ErrNotRunning
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Resume() error {
|
||||
@@ -666,7 +728,7 @@ func (c *linuxContainer) Resume() error {
|
||||
return err
|
||||
}
|
||||
if status != Paused {
|
||||
return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused)
|
||||
return ErrNotPaused
|
||||
}
|
||||
if err := c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
@@ -771,7 +833,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
||||
var err error
|
||||
c.criuVersion, err = criu.GetCriuVersion()
|
||||
if err != nil {
|
||||
return fmt.Errorf("CRIU version check failed: %s", err)
|
||||
return fmt.Errorf("CRIU version check failed: %w", err)
|
||||
}
|
||||
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
@@ -781,6 +843,9 @@ const descriptorsFilename = "descriptors.json"
|
||||
|
||||
func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
|
||||
mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs)
|
||||
if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil {
|
||||
mountDest = dest[len(c.config.Rootfs):]
|
||||
}
|
||||
extMnt := &criurpc.ExtMountMap{
|
||||
Key: proto.String(mountDest),
|
||||
Val: proto.String(mountDest),
|
||||
@@ -972,20 +1037,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if criuOpts.WorkDirectory == "" {
|
||||
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
|
||||
}
|
||||
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer workDir.Close()
|
||||
|
||||
imageDir, err := os.Open(criuOpts.ImagesDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -994,7 +1045,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
|
||||
rpcOpts := criurpc.CriuOpts{
|
||||
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
|
||||
WorkDirFd: proto.Int32(int32(workDir.Fd())),
|
||||
LogLevel: proto.Int32(4),
|
||||
LogFile: proto.String("dump.log"),
|
||||
Root: proto.String(c.config.Rootfs),
|
||||
@@ -1012,6 +1062,19 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
LazyPages: proto.Bool(criuOpts.LazyPages),
|
||||
}
|
||||
|
||||
// if criuOpts.WorkDirectory is not set, criu default is used.
|
||||
if criuOpts.WorkDirectory != "" {
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer workDir.Close()
|
||||
rpcOpts.WorkDirFd = proto.Int32(int32(workDir.Fd()))
|
||||
}
|
||||
|
||||
c.handleCriuConfigurationFile(&rpcOpts)
|
||||
|
||||
// If the container is running in a network namespace and has
|
||||
@@ -1054,7 +1117,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
mode := criuOpts.ManageCgroupsMode
|
||||
rpcOpts.ManageCgroupsMode = &mode
|
||||
}
|
||||
|
||||
@@ -1144,7 +1207,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
return err
|
||||
}
|
||||
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600)
|
||||
err = os.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1159,6 +1222,9 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
|
||||
func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) {
|
||||
mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs)
|
||||
if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil {
|
||||
mountDest = dest[len(c.config.Rootfs):]
|
||||
}
|
||||
extMnt := &criurpc.ExtMountMap{
|
||||
Key: proto.String(mountDest),
|
||||
Val: proto.String(m.Source),
|
||||
@@ -1203,7 +1269,9 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
case "bind":
|
||||
// The prepareBindMount() function checks if source
|
||||
// exists. So it cannot be used for other filesystem types.
|
||||
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
|
||||
// TODO: pass something else than nil? Not sure if criu is
|
||||
// impacted by issue #2484
|
||||
if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
@@ -1256,7 +1324,7 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
||||
for _, u := range umounts {
|
||||
_ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error {
|
||||
if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil {
|
||||
if e != unix.EINVAL {
|
||||
if e != unix.EINVAL { //nolint:errorlint // unix errors are bare
|
||||
// Ignore EINVAL as it means 'target is not a mount point.'
|
||||
// It probably has already been unmounted.
|
||||
logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e)
|
||||
@@ -1282,8 +1350,8 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
||||
// set up in the order they are configured.
|
||||
if m.Device == "bind" {
|
||||
if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(procfd string) error {
|
||||
if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
return errorsf.Wrapf(err, "unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd)
|
||||
if err := mount(m.Source, m.Destination, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
@@ -1311,19 +1379,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
if err := c.checkCriuVersion(30000); err != nil {
|
||||
return err
|
||||
}
|
||||
if criuOpts.WorkDirectory == "" {
|
||||
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
|
||||
}
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the work directory may already exist.
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer workDir.Close()
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
return errors.New("invalid directory to restore checkpoint")
|
||||
}
|
||||
@@ -1346,7 +1401,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = unix.Mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "")
|
||||
err = mount(c.config.Rootfs, root, "", "", unix.MS_BIND|unix.MS_REC, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1356,7 +1411,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
Type: &t,
|
||||
Opts: &criurpc.CriuOpts{
|
||||
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
|
||||
WorkDirFd: proto.Int32(int32(workDir.Fd())),
|
||||
EvasiveDevices: proto.Bool(true),
|
||||
LogLevel: proto.Int32(4),
|
||||
LogFile: proto.String("restore.log"),
|
||||
@@ -1383,7 +1437,26 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
}
|
||||
req.Opts.LsmProfile = proto.String(criuOpts.LsmProfile)
|
||||
}
|
||||
if criuOpts.LsmMountContext != "" {
|
||||
if err := c.checkCriuVersion(31600); err != nil {
|
||||
return errors.New("--lsm-mount-context requires at least CRIU 3.16")
|
||||
}
|
||||
req.Opts.LsmMountContext = proto.String(criuOpts.LsmMountContext)
|
||||
}
|
||||
|
||||
if criuOpts.WorkDirectory != "" {
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the work directory may already exist.
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer workDir.Close()
|
||||
req.Opts.WorkDirFd = proto.Int32(int32(workDir.Fd()))
|
||||
}
|
||||
c.handleCriuConfigurationFile(req.Opts)
|
||||
|
||||
if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil {
|
||||
@@ -1432,7 +1505,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
mode := criuOpts.ManageCgroupsMode
|
||||
req.Opts.ManageCgroupsMode = &mode
|
||||
}
|
||||
|
||||
@@ -1440,7 +1513,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
fds []string
|
||||
fdJSON []byte
|
||||
)
|
||||
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
|
||||
if fdJSON, err = os.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1477,7 +1550,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
}
|
||||
|
||||
if err := c.cgroupManager.Set(c.config.Cgroups.Resources); err != nil {
|
||||
return newSystemError(err)
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
@@ -1835,7 +1908,7 @@ func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
|
||||
}
|
||||
|
||||
func (c *linuxContainer) saveState(s *State) (retErr error) {
|
||||
tmpFile, err := ioutil.TempFile(c.root, "state-")
|
||||
tmpFile, err := os.CreateTemp(c.root, "state-")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1928,9 +2001,10 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||
startTime, _ = c.initProcess.startTime()
|
||||
externalDescriptors = c.initProcess.externalDescriptors()
|
||||
}
|
||||
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
|
||||
if err != nil {
|
||||
intelRdtPath = ""
|
||||
|
||||
intelRdtPath := ""
|
||||
if c.intelRdtManager != nil {
|
||||
intelRdtPath = c.intelRdtManager.GetPath()
|
||||
}
|
||||
state := &State{
|
||||
BaseState: BaseState{
|
||||
@@ -1998,16 +2072,16 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
|
||||
if p, ok := namespaces[ns]; ok && p != "" {
|
||||
// check if the requested namespace is supported
|
||||
if !configs.IsNamespaceSupported(ns) {
|
||||
return nil, newSystemError(fmt.Errorf("namespace %s is not supported", ns))
|
||||
return nil, fmt.Errorf("namespace %s is not supported", ns)
|
||||
}
|
||||
// only set to join this namespace if it exists
|
||||
if _, err := os.Lstat(p); err != nil {
|
||||
return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
|
||||
return nil, fmt.Errorf("namespace path: %w", err)
|
||||
}
|
||||
// do not allow namespace path with comma as we use it to separate
|
||||
// the namespace paths
|
||||
if strings.ContainsRune(p, ',') {
|
||||
return nil, newSystemError(fmt.Errorf("invalid path %s", p))
|
||||
return nil, fmt.Errorf("invalid namespace path %s", p)
|
||||
}
|
||||
paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p))
|
||||
}
|
||||
@@ -2039,7 +2113,7 @@ type netlinkError struct{ error }
|
||||
// such as one that uses nsenter package to bootstrap the container's
|
||||
// init process correctly, i.e. with correct namespaces, uid/gid
|
||||
// mapping etc.
|
||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (_ io.Reader, Err error) {
|
||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) {
|
||||
// create the netlink message
|
||||
r := nl.NewNetlinkRequest(int(InitMsg), 0)
|
||||
|
||||
@@ -2134,6 +2208,25 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
|
||||
Value: c.config.RootlessEUID,
|
||||
})
|
||||
|
||||
// Bind mount source to open.
|
||||
if it == initStandard && c.shouldSendMountSources() {
|
||||
var mounts []byte
|
||||
for _, m := range c.config.Mounts {
|
||||
if m.IsBind() {
|
||||
if strings.IndexByte(m.Source, 0) >= 0 {
|
||||
return nil, fmt.Errorf("mount source string contains null byte: %q", m.Source)
|
||||
}
|
||||
mounts = append(mounts, []byte(m.Source)...)
|
||||
}
|
||||
mounts = append(mounts, byte(0))
|
||||
}
|
||||
|
||||
r.AddData(&Bytemsg{
|
||||
Type: MountSourcesAttr,
|
||||
Value: mounts,
|
||||
})
|
||||
}
|
||||
|
||||
return bytes.NewReader(r.Serialize()), nil
|
||||
}
|
||||
|
||||
@@ -2144,7 +2237,7 @@ func ignoreTerminateErrors(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
// terminate() might return an error from ether Kill or Wait.
|
||||
// terminate() might return an error from either Kill or Wait.
|
||||
// The (*Cmd).Wait documentation says: "If the command fails to run
|
||||
// or doesn't complete successfully, the error is of type *ExitError".
|
||||
// Filter out such errors (like "exit status 1" or "signal: killed").
|
||||
@@ -2152,13 +2245,11 @@ func ignoreTerminateErrors(err error) error {
|
||||
if errors.As(err, &exitErr) {
|
||||
return nil
|
||||
}
|
||||
// TODO: use errors.Is(err, os.ErrProcessDone) here and
|
||||
// remove "process already finished" string comparison below
|
||||
// once go 1.16 is minimally supported version.
|
||||
|
||||
if errors.Is(err, os.ErrProcessDone) {
|
||||
return nil
|
||||
}
|
||||
s := err.Error()
|
||||
if strings.Contains(s, "process already finished") ||
|
||||
strings.Contains(s, "Wait was already called") {
|
||||
if strings.Contains(s, "Wait was already called") {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
|
1
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
@@ -30,4 +30,5 @@ type CriuOpts struct {
|
||||
LazyPages bool // restore memory pages lazily using userfaultfd
|
||||
StatusFd int // fd for feedback when lazy server is ready
|
||||
LsmProfile string // LSM profile used to restore the container
|
||||
LsmMountContext string // LSM mount context value to use during restore
|
||||
}
|
||||
|
12
vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
generated
vendored
12
vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
generated
vendored
@@ -1,10 +1,10 @@
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package devices
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -16,8 +16,8 @@ var ErrNotADevice = errors.New("not a device node")
|
||||
|
||||
// Testing dependencies
|
||||
var (
|
||||
unixLstat = unix.Lstat
|
||||
ioutilReadDir = ioutil.ReadDir
|
||||
unixLstat = unix.Lstat
|
||||
osReadDir = os.ReadDir
|
||||
)
|
||||
|
||||
func mkDev(d *Rule) (uint64, error) {
|
||||
@@ -40,7 +40,7 @@ func DeviceFromPath(path, permissions string) (*Device, error) {
|
||||
var (
|
||||
devType Type
|
||||
mode = stat.Mode
|
||||
devNumber = uint64(stat.Rdev)
|
||||
devNumber = uint64(stat.Rdev) //nolint:unconvert // Rdev is uint32 on e.g. MIPS.
|
||||
major = unix.Major(devNumber)
|
||||
minor = unix.Minor(devNumber)
|
||||
)
|
||||
@@ -76,7 +76,7 @@ func HostDevices() ([]*Device, error) {
|
||||
// GetDevices recursively traverses a directory specified by path
|
||||
// and returns all devices found there.
|
||||
func GetDevices(path string) ([]*Device, error) {
|
||||
files, err := ioutilReadDir(path)
|
||||
files, err := osReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -103,7 +103,7 @@ func GetDevices(path string) ([]*Device, error) {
|
||||
}
|
||||
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
|
||||
if err != nil {
|
||||
if err == ErrNotADevice {
|
||||
if errors.Is(err, ErrNotADevice) {
|
||||
continue
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
|
75
vendor/github.com/opencontainers/runc/libcontainer/error.go
generated
vendored
75
vendor/github.com/opencontainers/runc/libcontainer/error.go
generated
vendored
@@ -1,70 +1,13 @@
|
||||
package libcontainer
|
||||
|
||||
import "io"
|
||||
import "errors"
|
||||
|
||||
// ErrorCode is the API error code type.
|
||||
type ErrorCode int
|
||||
|
||||
// API error codes.
|
||||
const (
|
||||
// Factory errors
|
||||
IdInUse ErrorCode = iota
|
||||
InvalidIdFormat
|
||||
|
||||
// Container errors
|
||||
ContainerNotExists
|
||||
ContainerPaused
|
||||
ContainerNotStopped
|
||||
ContainerNotRunning
|
||||
ContainerNotPaused
|
||||
|
||||
// Process errors
|
||||
NoProcessOps
|
||||
|
||||
// Common errors
|
||||
ConfigInvalid
|
||||
ConsoleExists
|
||||
SystemError
|
||||
var (
|
||||
ErrExist = errors.New("container with given ID already exists")
|
||||
ErrInvalidID = errors.New("invalid container ID format")
|
||||
ErrNotExist = errors.New("container does not exist")
|
||||
ErrPaused = errors.New("container paused")
|
||||
ErrRunning = errors.New("container still running")
|
||||
ErrNotRunning = errors.New("container not running")
|
||||
ErrNotPaused = errors.New("container not paused")
|
||||
)
|
||||
|
||||
func (c ErrorCode) String() string {
|
||||
switch c {
|
||||
case IdInUse:
|
||||
return "Id already in use"
|
||||
case InvalidIdFormat:
|
||||
return "Invalid format"
|
||||
case ContainerPaused:
|
||||
return "Container paused"
|
||||
case ConfigInvalid:
|
||||
return "Invalid configuration"
|
||||
case SystemError:
|
||||
return "System error"
|
||||
case ContainerNotExists:
|
||||
return "Container does not exist"
|
||||
case ContainerNotStopped:
|
||||
return "Container is not stopped"
|
||||
case ContainerNotRunning:
|
||||
return "Container is not running"
|
||||
case ConsoleExists:
|
||||
return "Console exists for process"
|
||||
case ContainerNotPaused:
|
||||
return "Container is not paused"
|
||||
case NoProcessOps:
|
||||
return "No process operations"
|
||||
default:
|
||||
return "Unknown error"
|
||||
}
|
||||
}
|
||||
|
||||
// Error is the API error type.
|
||||
type Error interface {
|
||||
error
|
||||
|
||||
// Returns an error if it failed to write the detail of the Error to w.
|
||||
// The detail of the Error may include the error message and a
|
||||
// representation of the stack trace.
|
||||
Detail(w io.Writer) error
|
||||
|
||||
// Returns the error code for this error.
|
||||
Code() ErrorCode
|
||||
}
|
||||
|
14
vendor/github.com/opencontainers/runc/libcontainer/factory.go
generated
vendored
14
vendor/github.com/opencontainers/runc/libcontainer/factory.go
generated
vendored
@@ -14,29 +14,15 @@ type Factory interface {
|
||||
//
|
||||
// Returns the new container with a running process.
|
||||
//
|
||||
// errors:
|
||||
// IdInUse - id is already in use by a container
|
||||
// InvalidIdFormat - id has incorrect format
|
||||
// ConfigInvalid - config is invalid
|
||||
// Systemerror - System error
|
||||
//
|
||||
// On error, any partially created container parts are cleaned up (the operation is atomic).
|
||||
Create(id string, config *configs.Config) (Container, error)
|
||||
|
||||
// Load takes an ID for an existing container and returns the container information
|
||||
// from the state. This presents a read only view of the container.
|
||||
//
|
||||
// errors:
|
||||
// Path does not exist
|
||||
// System error
|
||||
Load(id string) (Container, error)
|
||||
|
||||
// StartInitialization is an internal API to libcontainer used during the reexec of the
|
||||
// container.
|
||||
//
|
||||
// Errors:
|
||||
// Pipe connection error
|
||||
// System error
|
||||
StartInitialization() error
|
||||
|
||||
// Type returns info string about factory type (e.g. lxc, libcontainer...)
|
||||
|
252
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
252
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
@@ -1,9 +1,8 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -13,17 +12,14 @@ import (
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/manager"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -41,7 +37,9 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
|
||||
// Resolve relative paths to ensure that its available
|
||||
// after directory changes.
|
||||
if args[0], err = filepath.Abs(args[0]); err != nil {
|
||||
return newGenericError(err, ConfigInvalid)
|
||||
// The only error returned from filepath.Abs is
|
||||
// the one from os.Getwd, i.e. a system error.
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,100 +48,6 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
|
||||
}
|
||||
}
|
||||
|
||||
func getUnifiedPath(paths map[string]string) string {
|
||||
path := ""
|
||||
for k, v := range paths {
|
||||
if path == "" {
|
||||
path = v
|
||||
} else if v != path {
|
||||
panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, path, v))
|
||||
}
|
||||
}
|
||||
// can be empty
|
||||
if path != "" {
|
||||
if filepath.Clean(path) != path || !filepath.IsAbs(path) {
|
||||
panic(errors.Errorf("invalid dir path %q", path))
|
||||
}
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
func systemdCgroupV2(l *LinuxFactory, rootless bool) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return systemd.NewUnifiedManager(config, getUnifiedPath(paths), rootless)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
if !systemd.IsRunningSystemd() {
|
||||
return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return systemdCgroupV2(l, false)
|
||||
}
|
||||
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return systemd.NewLegacyManager(config, paths)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RootlessSystemdCgroups is rootless version of SystemdCgroups.
|
||||
func RootlessSystemdCgroups(l *LinuxFactory) error {
|
||||
if !systemd.IsRunningSystemd() {
|
||||
return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
|
||||
}
|
||||
|
||||
if !cgroups.IsCgroup2UnifiedMode() {
|
||||
return fmt.Errorf("cgroup v2 not enabled on this host, can't use systemd (rootless) as cgroups manager")
|
||||
}
|
||||
return systemdCgroupV2(l, true)
|
||||
}
|
||||
|
||||
func cgroupfs2(l *LinuxFactory, rootless bool) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func cgroupfs(l *LinuxFactory, rootless bool) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, rootless)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return fs.NewManager(config, paths, rootless)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return containers
|
||||
// that use the native cgroups filesystem implementation to create and manage
|
||||
// cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
return cgroupfs(l, false)
|
||||
}
|
||||
|
||||
// RootlessCgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to create
|
||||
// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
|
||||
// that RootlessCgroupfs can transparently handle permission errors that occur
|
||||
// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
|
||||
// they've been set up properly).
|
||||
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||
return cgroupfs(l, true)
|
||||
}
|
||||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the Intel RDT "resource control" filesystem to
|
||||
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
|
||||
@@ -165,7 +69,7 @@ func TmpfsRoot(l *LinuxFactory) error {
|
||||
return err
|
||||
}
|
||||
if !mounted {
|
||||
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
|
||||
if err := mount("tmpfs", l.Root, "", "tmpfs", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -186,7 +90,7 @@ func CriuPath(criupath string) func(*LinuxFactory) error {
|
||||
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
|
||||
if root != "" {
|
||||
if err := os.MkdirAll(root, 0o700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
l := &LinuxFactory{
|
||||
@@ -197,10 +101,6 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
|
||||
CriuPath: "criu",
|
||||
}
|
||||
|
||||
if err := Cgroupfs(l); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, opt := range options {
|
||||
if opt == nil {
|
||||
continue
|
||||
@@ -237,37 +137,69 @@ type LinuxFactory struct {
|
||||
// Validator provides validation to container configurations.
|
||||
Validator validate.Validator
|
||||
|
||||
// NewCgroupsManager returns an initialized cgroups manager for a single container.
|
||||
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
|
||||
|
||||
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
|
||||
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
return nil, errors.New("root not set")
|
||||
}
|
||||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := l.Validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
return nil, err
|
||||
}
|
||||
containerRoot, err := securejoin.SecureJoin(l.Root, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
|
||||
return nil, ErrExist
|
||||
} else if !os.IsNotExist(err) {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cm, err := manager.New(config.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check that cgroup does not exist or empty (no processes).
|
||||
// Note for cgroup v1 this check is not thorough, as there are multiple
|
||||
// separate hierarchies, while both Exists() and GetAllPids() only use
|
||||
// one for "devices" controller (assuming others are the same, which is
|
||||
// probably true in almost all scenarios). Checking all the hierarchies
|
||||
// would be too expensive.
|
||||
if cm.Exists() {
|
||||
pids, err := cm.GetAllPids()
|
||||
// Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV.
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) {
|
||||
return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err)
|
||||
}
|
||||
if len(pids) != 0 {
|
||||
// TODO: return an error.
|
||||
logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids))
|
||||
logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132")
|
||||
}
|
||||
}
|
||||
|
||||
// Check that cgroup is not frozen. Do not use Exists() here
|
||||
// since in cgroup v1 it only checks "devices" controller.
|
||||
st, err := cm.GetFreezerState()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err)
|
||||
}
|
||||
if st == configs.Frozen {
|
||||
return nil, errors.New("container's cgroup unexpectedly frozen")
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(containerRoot, 0o711); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
return nil, err
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
@@ -278,7 +210,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
||||
criuPath: l.CriuPath,
|
||||
newuidmapPath: l.NewuidmapPath,
|
||||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
cgroupManager: cm,
|
||||
}
|
||||
if l.NewIntelRdtManager != nil {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
||||
@@ -289,7 +221,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
||||
|
||||
func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
return nil, errors.New("root not set")
|
||||
}
|
||||
// when load, we need to check id is valid or not.
|
||||
if err := l.validateID(id); err != nil {
|
||||
@@ -299,7 +231,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state, err := l.loadState(containerRoot, id)
|
||||
state, err := l.loadState(containerRoot)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -308,6 +240,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
initProcessStartTime: state.InitProcessStartTime,
|
||||
@@ -318,7 +254,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
criuPath: l.CriuPath,
|
||||
newuidmapPath: l.NewuidmapPath,
|
||||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
|
||||
cgroupManager: cm,
|
||||
root: containerRoot,
|
||||
created: state.Created,
|
||||
}
|
||||
@@ -343,11 +279,26 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE")
|
||||
pipefd, err := strconv.Atoi(envInitPipe)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
|
||||
err = fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err)
|
||||
logrus.Error(err)
|
||||
return err
|
||||
}
|
||||
pipe := os.NewFile(uintptr(pipefd), "pipe")
|
||||
defer pipe.Close()
|
||||
|
||||
defer func() {
|
||||
// We have an error during the initialization of the container's init,
|
||||
// send it back to the parent process in the form of an initError.
|
||||
if werr := writeSync(pipe, procError); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
if werr := utils.WriteJSON(pipe, &initError{Message: err.Error()}); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
// Only init processes have FIFOFD.
|
||||
fifofd := -1
|
||||
envInitType := os.Getenv("_LIBCONTAINER_INITTYPE")
|
||||
@@ -355,7 +306,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
if it == initStandard {
|
||||
envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD")
|
||||
if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,7 +314,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" {
|
||||
console, err := strconv.Atoi(envConsole)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err)
|
||||
}
|
||||
consoleSocket = os.NewFile(uintptr(console), "console-socket")
|
||||
defer consoleSocket.Close()
|
||||
@@ -372,32 +323,26 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE")
|
||||
logPipeFd, err := strconv.Atoi(logPipeFdStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE=%s to int: %s", logPipeFdStr, err)
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
|
||||
}
|
||||
|
||||
// Get mount files (O_PATH).
|
||||
mountFds, err := parseMountFds()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
|
||||
defer func() {
|
||||
// We have an error during the initialization of the container's init,
|
||||
// send it back to the parent process in the form of an initError.
|
||||
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
|
||||
err = fmt.Errorf("panic from initialization: %w, %v", e, string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
|
||||
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
|
||||
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -406,7 +351,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
return i.Init()
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
|
||||
func (l *LinuxFactory) loadState(root string) (*State, error) {
|
||||
stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -414,21 +359,21 @@ func (l *LinuxFactory) loadState(root, id string) (*State, error) {
|
||||
f, err := os.Open(stateFilePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
|
||||
return nil, ErrNotExist
|
||||
}
|
||||
return nil, newGenericError(err, SystemError)
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var state *State
|
||||
if err := json.NewDecoder(f).Decode(&state); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
return nil, err
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) validateID(id string) error {
|
||||
if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
|
||||
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
|
||||
return ErrInvalidID
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -451,3 +396,18 @@ func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func parseMountFds() ([]int, error) {
|
||||
fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
|
||||
if fdsJson == "" {
|
||||
// Always return the nil slice if no fd is present.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var mountFds []int
|
||||
if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil {
|
||||
return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
|
||||
}
|
||||
|
||||
return mountFds, nil
|
||||
}
|
||||
|
92
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
92
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
@@ -1,92 +0,0 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/stacktrace"
|
||||
)
|
||||
|
||||
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
|
||||
Code: {{.ECode}}
|
||||
{{if .Message }}
|
||||
Message: {{.Message}}
|
||||
{{end}}
|
||||
Frames:{{range $i, $frame := .Stack.Frames}}
|
||||
---
|
||||
{{$i}}: {{$frame.Function}}
|
||||
Package: {{$frame.Package}}
|
||||
File: {{$frame.File}}@{{$frame.Line}}{{end}}
|
||||
`))
|
||||
|
||||
func newGenericError(err error, c ErrorCode) Error {
|
||||
if le, ok := err.(Error); ok {
|
||||
return le
|
||||
}
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: c,
|
||||
Stack: stacktrace.Capture(1),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
func newSystemError(err error) Error {
|
||||
return createSystemError(err, "")
|
||||
}
|
||||
|
||||
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
|
||||
return createSystemError(err, fmt.Sprintf(cause, v...))
|
||||
}
|
||||
|
||||
func newSystemErrorWithCause(err error, cause string) Error {
|
||||
return createSystemError(err, cause)
|
||||
}
|
||||
|
||||
// createSystemError creates the specified error with the correct number of
|
||||
// stack frames skipped. This is only to be called by the other functions for
|
||||
// formatting the error.
|
||||
func createSystemError(err error, cause string) Error {
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: SystemError,
|
||||
Cause: cause,
|
||||
Stack: stacktrace.Capture(2),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
type genericError struct {
|
||||
Timestamp time.Time
|
||||
ECode ErrorCode
|
||||
Err error `json:"-"`
|
||||
Cause string
|
||||
Message string
|
||||
Stack stacktrace.Stacktrace
|
||||
}
|
||||
|
||||
func (e *genericError) Error() string {
|
||||
if e.Cause == "" {
|
||||
return e.Message
|
||||
}
|
||||
frame := e.Stack.Frames[0]
|
||||
return fmt.Sprintf("%s:%d: %s caused: %s", frame.File, frame.Line, e.Cause, e.Message)
|
||||
}
|
||||
|
||||
func (e *genericError) Code() ErrorCode {
|
||||
return e.ECode
|
||||
}
|
||||
|
||||
func (e *genericError) Detail(w io.Writer) error {
|
||||
return errorTemplate.Execute(w, e)
|
||||
}
|
110
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
110
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
@@ -1,30 +1,29 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/capabilities"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type initType string
|
||||
@@ -77,7 +76,7 @@ type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
|
||||
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
@@ -87,6 +86,11 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
|
||||
}
|
||||
switch t {
|
||||
case initSetns:
|
||||
// mountFds must be nil in this case. We don't mount while doing runc exec.
|
||||
if mountFds != nil {
|
||||
return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.")
|
||||
}
|
||||
|
||||
return &linuxSetnsInit{
|
||||
pipe: pipe,
|
||||
consoleSocket: consoleSocket,
|
||||
@@ -101,6 +105,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
|
||||
config: config,
|
||||
fifoFd: fifoFd,
|
||||
logFd: logFd,
|
||||
mountFds: mountFds,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
@@ -139,7 +144,7 @@ func finalizeNamespace(config *initConfig) error {
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
|
||||
return errors.Wrap(err, "close exec fds")
|
||||
return fmt.Errorf("error closing exec fds: %w", err)
|
||||
}
|
||||
|
||||
// we only do chdir if it's specified
|
||||
@@ -158,7 +163,7 @@ func finalizeNamespace(config *initConfig) error {
|
||||
// to the directory, but the user running runc does not.
|
||||
// This is useful in cases where the cwd is also a volume that's been chowned to the container user.
|
||||
default:
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,26 +179,26 @@ func finalizeNamespace(config *initConfig) error {
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := w.ApplyBoundingSet(); err != nil {
|
||||
return errors.Wrap(err, "apply bounding set")
|
||||
return fmt.Errorf("unable to apply bounding set: %w", err)
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return errors.Wrap(err, "set keep caps")
|
||||
return fmt.Errorf("unable to set keep caps: %w", err)
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return errors.Wrap(err, "setup user")
|
||||
return fmt.Errorf("unable to setup user: %w", err)
|
||||
}
|
||||
// Change working directory AFTER the user has been set up, if we haven't done it yet.
|
||||
if doChdir {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return errors.Wrap(err, "clear keep caps")
|
||||
return fmt.Errorf("unable to clear keep caps: %w", err)
|
||||
}
|
||||
if err := w.ApplyCaps(); err != nil {
|
||||
return errors.Wrap(err, "apply caps")
|
||||
return fmt.Errorf("unable to apply caps: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -272,6 +277,36 @@ func syncParentHooks(pipe io.ReadWriter) error {
|
||||
return readSync(pipe, procResume)
|
||||
}
|
||||
|
||||
// syncParentSeccomp sends to the given pipe a JSON payload which
|
||||
// indicates that the parent should pick up the seccomp fd with pidfd_getfd()
|
||||
// and send it to the seccomp agent over a unix socket. It then waits for
|
||||
// the parent to indicate that it is cleared to resume and closes the seccompFd.
|
||||
// If the seccompFd is -1, there isn't anything to sync with the parent, so it
|
||||
// returns no error.
|
||||
func syncParentSeccomp(pipe io.ReadWriter, seccompFd int) error {
|
||||
if seccompFd == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Tell parent.
|
||||
if err := writeSyncWithFd(pipe, procSeccomp, seccompFd); err != nil {
|
||||
unix.Close(seccompFd)
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procSeccompDone); err != nil {
|
||||
unix.Close(seccompFd)
|
||||
return fmt.Errorf("sync parent seccomp: %w", err)
|
||||
}
|
||||
|
||||
if err := unix.Close(seccompFd); err != nil {
|
||||
return fmt.Errorf("close seccomp fd: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *initConfig) error {
|
||||
// Set up defaults.
|
||||
@@ -325,11 +360,11 @@ func setupUser(config *initConfig) error {
|
||||
|
||||
// Before we change to the container's user make sure that the processes
|
||||
// STDIO is correctly owned by the user that we are switching to.
|
||||
if err := fixStdioPermissions(config, execUser); err != nil {
|
||||
if err := fixStdioPermissions(execUser); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
setgroups, err := ioutil.ReadFile("/proc/self/setgroups")
|
||||
setgroups, err := os.ReadFile("/proc/self/setgroups")
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
@@ -343,7 +378,7 @@ func setupUser(config *initConfig) error {
|
||||
if allowSupGroups {
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := unix.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
return &os.SyscallError{Syscall: "setgroups", Err: err}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -366,10 +401,10 @@ func setupUser(config *initConfig) error {
|
||||
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
|
||||
// The ownership needs to match because it is created outside of the container and needs to be
|
||||
// localized.
|
||||
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
|
||||
func fixStdioPermissions(u *user.ExecUser) error {
|
||||
var null unix.Stat_t
|
||||
if err := unix.Stat("/dev/null", &null); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "stat", Path: "/dev/null", Err: err}
|
||||
}
|
||||
for _, fd := range []uintptr{
|
||||
os.Stdin.Fd(),
|
||||
@@ -378,7 +413,7 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
|
||||
} {
|
||||
var s unix.Stat_t
|
||||
if err := unix.Fstat(int(fd), &s); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
|
||||
}
|
||||
|
||||
// Skip chown of /dev/null if it was used as one of the STDIO fds.
|
||||
@@ -399,10 +434,12 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
|
||||
// privileged_wrt_inode_uidgid() has failed). In either case, we
|
||||
// are in a configuration where it's better for us to just not
|
||||
// touch the stdio rather than bail at this point.
|
||||
|
||||
// nolint:errorlint // unix errors are bare
|
||||
if err == unix.EINVAL || err == unix.EPERM {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
return &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -456,8 +493,8 @@ func setupRoute(config *configs.Config) error {
|
||||
|
||||
func setupRlimits(limits []configs.Rlimit, pid int) error {
|
||||
for _, rlimit := range limits {
|
||||
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
if err := unix.Prlimit(pid, rlimit.Type, &unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}, nil); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %w", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -482,27 +519,12 @@ func isWaitable(pid int) (bool, error) {
|
||||
si := &siginfo{}
|
||||
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
|
||||
if e != 0 {
|
||||
return false, os.NewSyscallError("waitid", e)
|
||||
return false, &os.SyscallError{Syscall: "waitid", Err: e}
|
||||
}
|
||||
|
||||
return si.si_pid != 0, nil
|
||||
}
|
||||
|
||||
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
|
||||
func isNoChildren(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case unix.Errno:
|
||||
if err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
case *os.SyscallError:
|
||||
if err.Err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// signalAllProcesses freezes then iterates over all the processes inside the
|
||||
// manager's cgroups sending the signal s to them.
|
||||
// If s is SIGKILL then it will wait for each process to exit.
|
||||
@@ -548,7 +570,7 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
|
||||
for _, p := range procs {
|
||||
if s != unix.SIGKILL {
|
||||
if ok, err := isWaitable(p.Pid); err != nil {
|
||||
if !isNoChildren(err) {
|
||||
if !errors.Is(err, unix.ECHILD) {
|
||||
logrus.Warn("signalAllProcesses: ", p.Pid, err)
|
||||
}
|
||||
continue
|
||||
@@ -565,7 +587,7 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
|
||||
// to retrieve its exit code.
|
||||
if subreaper == 0 {
|
||||
if _, err := p.Wait(); err != nil {
|
||||
if !isNoChildren(err) {
|
||||
if !errors.Is(err, unix.ECHILD) {
|
||||
logrus.Warn("wait: ", err)
|
||||
}
|
||||
}
|
||||
|
241
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
241
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
@@ -1,13 +1,11 @@
|
||||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -15,6 +13,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
@@ -70,7 +69,7 @@ import (
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <container_id>
|
||||
* |-- <clos>
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
@@ -153,7 +152,7 @@ type Manager interface {
|
||||
// Returns statistics for Intel RDT
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Destroys the Intel RDT 'container_id' group
|
||||
// Destroys the Intel RDT container-specific 'container_id' group
|
||||
Destroy() error
|
||||
|
||||
// Returns Intel RDT path to save in a state file and to be able to
|
||||
@@ -181,14 +180,10 @@ func NewManager(config *configs.Config, id string, path string) Manager {
|
||||
}
|
||||
|
||||
const (
|
||||
IntelRdtTasks = "tasks"
|
||||
intelRdtTasks = "tasks"
|
||||
)
|
||||
|
||||
var (
|
||||
// The absolute root path of the Intel RDT "resource control" filesystem
|
||||
intelRdtRoot string
|
||||
intelRdtRootLock sync.Mutex
|
||||
|
||||
// The flag to indicate if Intel RDT/CAT is enabled
|
||||
catEnabled bool
|
||||
// The flag to indicate if Intel RDT/MBA is enabled
|
||||
@@ -198,13 +193,9 @@ var (
|
||||
|
||||
// For Intel RDT initialization
|
||||
initOnce sync.Once
|
||||
)
|
||||
|
||||
type intelRdtData struct {
|
||||
root string
|
||||
config *configs.Config
|
||||
pid int
|
||||
}
|
||||
errNotFound = errors.New("Intel RDT resctrl mount point not found")
|
||||
)
|
||||
|
||||
// Check if Intel RDT sub-features are enabled in featuresInit()
|
||||
func featuresInit() {
|
||||
@@ -215,9 +206,10 @@ func featuresInit() {
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||
// The user guarantees to mount the filesystem
|
||||
if !isIntelRdtMounted() {
|
||||
// 2. Check if Intel RDT "resource control" filesystem is available.
|
||||
// The user guarantees to mount the filesystem.
|
||||
root, err := Root()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -226,7 +218,7 @@ func featuresInit() {
|
||||
// selectively disabled or enabled by kernel command line
|
||||
// (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
|
||||
if flagsSet.CAT {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil {
|
||||
if _, err := os.Stat(filepath.Join(root, "info", "L3")); err == nil {
|
||||
catEnabled = true
|
||||
}
|
||||
}
|
||||
@@ -236,15 +228,15 @@ func featuresInit() {
|
||||
// depends on MBA
|
||||
mbaEnabled = true
|
||||
} else if flagsSet.MBA {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
|
||||
if _, err := os.Stat(filepath.Join(root, "info", "MB")); err == nil {
|
||||
mbaEnabled = true
|
||||
}
|
||||
}
|
||||
if flagsSet.MBMTotal || flagsSet.MBMLocal || flagsSet.CMT {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3_MON")); err != nil {
|
||||
if _, err := os.Stat(filepath.Join(root, "info", "L3_MON")); err != nil {
|
||||
return
|
||||
}
|
||||
enabledMonFeatures, err = getMonFeatures(intelRdtRoot)
|
||||
enabledMonFeatures, err = getMonFeatures(root)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -271,7 +263,7 @@ func findIntelRdtMountpointDir(f io.Reader) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", NewNotFoundError("Intel RDT")
|
||||
return "", errNotFound
|
||||
}
|
||||
|
||||
// Check if MBA Software Controller is enabled through mount option "-o mba_MBps"
|
||||
@@ -282,10 +274,16 @@ func findIntelRdtMountpointDir(f io.Reader) (string, error) {
|
||||
return mi[0].Mountpoint, nil
|
||||
}
|
||||
|
||||
// Gets the root path of Intel RDT "resource control" filesystem
|
||||
func getIntelRdtRoot() (string, error) {
|
||||
intelRdtRootLock.Lock()
|
||||
defer intelRdtRootLock.Unlock()
|
||||
// For Root() use only.
|
||||
var (
|
||||
intelRdtRoot string
|
||||
rootMu sync.Mutex
|
||||
)
|
||||
|
||||
// Root returns the Intel RDT "resource control" filesystem mount point.
|
||||
func Root() (string, error) {
|
||||
rootMu.Lock()
|
||||
defer rootMu.Unlock()
|
||||
|
||||
if intelRdtRoot != "" {
|
||||
return intelRdtRoot, nil
|
||||
@@ -309,11 +307,6 @@ func getIntelRdtRoot() (string, error) {
|
||||
return intelRdtRoot, nil
|
||||
}
|
||||
|
||||
func isIntelRdtMounted() bool {
|
||||
_, err := getIntelRdtRoot()
|
||||
return err == nil
|
||||
}
|
||||
|
||||
type cpuInfoFlags struct {
|
||||
CAT bool // Cache Allocation Technology
|
||||
MBA bool // Memory Bandwidth Allocation
|
||||
@@ -366,33 +359,15 @@ func parseCpuInfoFile(path string) (cpuInfoFlags, error) {
|
||||
return infoFlags, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// Gets a single uint64 value from the specified file.
|
||||
func getIntelRdtParamUint(path, file string) (uint64, error) {
|
||||
fileName := filepath.Join(path, file)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
contents, err := os.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
res, err := parseUint(string(bytes.TrimSpace(contents)), 10, 64)
|
||||
res, err := fscommon.ParseUint(string(bytes.TrimSpace(contents)), 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
|
||||
}
|
||||
@@ -401,7 +376,7 @@ func getIntelRdtParamUint(path, file string) (uint64, error) {
|
||||
|
||||
// Gets a string value from the specified file
|
||||
func getIntelRdtParamString(path, file string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(path, file))
|
||||
contents, err := os.ReadFile(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -413,29 +388,17 @@ func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
|
||||
if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
|
||||
return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &intelRdtData{
|
||||
root: rootPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get the read-only L3 cache information
|
||||
func getL3CacheInfo() (*L3CacheInfo, error) {
|
||||
l3CacheInfo := &L3CacheInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
@@ -465,7 +428,7 @@ func getL3CacheInfo() (*L3CacheInfo, error) {
|
||||
func getMemBwInfo() (*MemBwInfo, error) {
|
||||
memBwInfo := &MemBwInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
@@ -498,7 +461,7 @@ func getMemBwInfo() (*MemBwInfo, error) {
|
||||
|
||||
// Get diagnostics for last filesystem operation error from file info/last_cmd_status
|
||||
func getLastCmdStatus() (string, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -515,13 +478,13 @@ func getLastCmdStatus() (string, error) {
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
|
||||
return fmt.Errorf("no such directory for %s", intelRdtTasks)
|
||||
}
|
||||
|
||||
// Don't attach any pid if -1 is specified as a pid
|
||||
if pid != -1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
|
||||
if err := os.WriteFile(filepath.Join(dir, intelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil {
|
||||
return newLastCmdError(fmt.Errorf("intelrdt: unable to add pid %d: %w", pid, err))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -545,15 +508,19 @@ func IsMBAScEnabled() bool {
|
||||
return mbaScEnabled
|
||||
}
|
||||
|
||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||
func GetIntelRdtPath(id string) (string, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
// Get the path of the clos group in "resource control" filesystem that the container belongs to
|
||||
func (m *intelRdtManager) getIntelRdtPath() (string, error) {
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, id)
|
||||
return path, nil
|
||||
clos := m.id
|
||||
if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID != "" {
|
||||
clos = m.config.IntelRdt.ClosID
|
||||
}
|
||||
|
||||
return filepath.Join(rootPath, clos), nil
|
||||
}
|
||||
|
||||
// Applies Intel RDT configuration to the process with the specified pid
|
||||
@@ -562,30 +529,48 @@ func (m *intelRdtManager) Apply(pid int) (err error) {
|
||||
if m.config.IntelRdt == nil {
|
||||
return nil
|
||||
}
|
||||
d, err := getIntelRdtData(m.config, pid)
|
||||
if err != nil && !IsNotFound(err) {
|
||||
|
||||
path, err := m.getIntelRdtPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
path, err := d.join(m.id)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" {
|
||||
// Check that the CLOS exists, i.e. it has been pre-configured to
|
||||
// conform with the runtime spec
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return newLastCmdError(err)
|
||||
}
|
||||
|
||||
if err := WriteIntelRdtTasks(path, pid); err != nil {
|
||||
return newLastCmdError(err)
|
||||
}
|
||||
|
||||
m.path = path
|
||||
return nil
|
||||
}
|
||||
|
||||
// Destroys the Intel RDT 'container_id' group
|
||||
// Destroys the Intel RDT container-specific 'container_id' group
|
||||
func (m *intelRdtManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := os.RemoveAll(m.GetPath()); err != nil {
|
||||
return err
|
||||
// Don't remove resctrl group if closid has been explicitly specified. The
|
||||
// group is likely externally managed, i.e. by some other entity than us.
|
||||
// There are probably other containers/tasks sharing the same group.
|
||||
if m.config.IntelRdt == nil || m.config.IntelRdt.ClosID == "" {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := os.RemoveAll(m.GetPath()); err != nil {
|
||||
return err
|
||||
}
|
||||
m.path = ""
|
||||
}
|
||||
m.path = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -593,7 +578,7 @@ func (m *intelRdtManager) Destroy() error {
|
||||
// restore the object later
|
||||
func (m *intelRdtManager) GetPath() string {
|
||||
if m.path == "" {
|
||||
m.path, _ = GetIntelRdtPath(m.id)
|
||||
m.path, _ = m.getIntelRdtPath()
|
||||
}
|
||||
return m.path
|
||||
}
|
||||
@@ -607,9 +592,9 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := NewStats()
|
||||
stats := newStats()
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -620,7 +605,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
|
||||
}
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
|
||||
// The L3 cache and memory bandwidth schemata in 'container_id' group
|
||||
// The L3 cache and memory bandwidth schemata in container's clos group
|
||||
containerPath := m.GetPath()
|
||||
tmpStrings, err := getIntelRdtParamString(containerPath, "schemata")
|
||||
if err != nil {
|
||||
@@ -643,7 +628,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
// The L3 cache schema in container's clos group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "L3") {
|
||||
stats.L3CacheSchema = strings.TrimSpace(schema)
|
||||
@@ -666,7 +651,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
// The memory bandwidth schema in container's clos group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "MB") {
|
||||
stats.MemBwSchema = strings.TrimSpace(schema)
|
||||
@@ -736,24 +721,30 @@ func (m *intelRdtManager) Set(container *configs.Config) error {
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
memBwSchema := container.IntelRdt.MemBwSchema
|
||||
|
||||
// TODO: verify that l3CacheSchema and/or memBwSchema match the
|
||||
// existing schemata if ClosID has been specified. This is a more
|
||||
// involved than reading the file and doing plain string comparison as
|
||||
// the value written in does not necessarily match what gets read out
|
||||
// (leading zeros, cache id ordering etc).
|
||||
|
||||
// Write a single joint schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
|
||||
return NewLastCmdError(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only L3 cache schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema == "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return NewLastCmdError(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only memory bandwidth schema string to schemata file
|
||||
if l3CacheSchema == "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", memBwSchema); err != nil {
|
||||
return NewLastCmdError(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -761,56 +752,10 @@ func (m *intelRdtManager) Set(container *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (raw *intelRdtData) join(id string) (string, error) {
|
||||
path := filepath.Join(raw.root, id)
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return "", NewLastCmdError(err)
|
||||
}
|
||||
|
||||
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
|
||||
return "", NewLastCmdError(err)
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
ResourceControl string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
|
||||
}
|
||||
|
||||
func NewNotFoundError(res string) error {
|
||||
return &NotFoundError{
|
||||
ResourceControl: res,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
}
|
||||
|
||||
type LastCmdError struct {
|
||||
LastCmdStatus string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *LastCmdError) Error() string {
|
||||
return e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus
|
||||
}
|
||||
|
||||
func NewLastCmdError(err error) error {
|
||||
lastCmdStatus, err1 := getLastCmdStatus()
|
||||
func newLastCmdError(err error) error {
|
||||
status, err1 := getLastCmdStatus()
|
||||
if err1 == nil {
|
||||
return &LastCmdError{
|
||||
LastCmdStatus: lastCmdStatus,
|
||||
Err: err,
|
||||
}
|
||||
return fmt.Errorf("%w, last_cmd_status: %s", err, status)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
// The flag to indicate if Intel RDT/MBM is enabled
|
||||
|
3
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
generated
vendored
@@ -3,7 +3,6 @@ package intelrdt
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -49,7 +48,7 @@ func parseMonFeatures(reader io.Reader) (monFeatures, error) {
|
||||
}
|
||||
|
||||
func getMonitoringStats(containerPath string, stats *Stats) error {
|
||||
numaFiles, err := ioutil.ReadDir(filepath.Join(containerPath, "mon_data"))
|
||||
numaFiles, err := os.ReadDir(filepath.Join(containerPath, "mon_data"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
type L3CacheInfo struct {
|
||||
@@ -54,6 +52,6 @@ type Stats struct {
|
||||
CMTStats *[]CMTNumaNodeStats `json:"cmt_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
func newStats() *Stats {
|
||||
return &Stats{}
|
||||
}
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
@@ -1,13 +1,11 @@
|
||||
// +build linux
|
||||
|
||||
package keys
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -16,7 +14,7 @@ type KeySerial uint32
|
||||
func JoinSessionKeyring(name string) (KeySerial, error) {
|
||||
sessKeyID, err := unix.KeyctlJoinSessionKeyring(name)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "create session key")
|
||||
return 0, fmt.Errorf("unable to create session key: %w", err)
|
||||
}
|
||||
return KeySerial(sessKeyID), nil
|
||||
}
|
||||
|
80
vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go
generated
vendored
80
vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go
generated
vendored
@@ -3,37 +3,28 @@ package logs
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
configureMutex sync.Mutex
|
||||
// loggingConfigured will be set once logging has been configured via invoking `ConfigureLogging`.
|
||||
// Subsequent invocations of `ConfigureLogging` would be no-op
|
||||
loggingConfigured = false
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
LogLevel logrus.Level
|
||||
LogFormat string
|
||||
LogFilePath string
|
||||
LogPipeFd int
|
||||
LogCaller bool
|
||||
}
|
||||
|
||||
func ForwardLogs(logPipe io.ReadCloser) chan error {
|
||||
done := make(chan error, 1)
|
||||
s := bufio.NewScanner(logPipe)
|
||||
|
||||
logger := logrus.StandardLogger()
|
||||
if logger.ReportCaller {
|
||||
// Need a copy of the standard logger, but with ReportCaller
|
||||
// turned off, as the logs are merely forwarded and their
|
||||
// true source is not this file/line/function.
|
||||
logNoCaller := *logrus.StandardLogger()
|
||||
logNoCaller.ReportCaller = false
|
||||
logger = &logNoCaller
|
||||
}
|
||||
|
||||
go func() {
|
||||
for s.Scan() {
|
||||
processEntry(s.Bytes())
|
||||
processEntry(s.Bytes(), logger)
|
||||
}
|
||||
if err := logPipe.Close(); err != nil {
|
||||
logrus.Errorf("error closing log source: %v", err)
|
||||
@@ -47,60 +38,19 @@ func ForwardLogs(logPipe io.ReadCloser) chan error {
|
||||
return done
|
||||
}
|
||||
|
||||
func processEntry(text []byte) {
|
||||
func processEntry(text []byte, logger *logrus.Logger) {
|
||||
if len(text) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var jl struct {
|
||||
Level string `json:"level"`
|
||||
Msg string `json:"msg"`
|
||||
Level logrus.Level `json:"level"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
if err := json.Unmarshal(text, &jl); err != nil {
|
||||
logrus.Errorf("failed to decode %q to json: %v", text, err)
|
||||
return
|
||||
}
|
||||
|
||||
lvl, err := logrus.ParseLevel(jl.Level)
|
||||
if err != nil {
|
||||
logrus.Errorf("failed to parse log level %q: %v", jl.Level, err)
|
||||
return
|
||||
}
|
||||
logrus.StandardLogger().Logf(lvl, jl.Msg)
|
||||
}
|
||||
|
||||
func ConfigureLogging(config Config) error {
|
||||
configureMutex.Lock()
|
||||
defer configureMutex.Unlock()
|
||||
|
||||
if loggingConfigured {
|
||||
return errors.New("logging has already been configured")
|
||||
}
|
||||
|
||||
logrus.SetLevel(config.LogLevel)
|
||||
logrus.SetReportCaller(config.LogCaller)
|
||||
|
||||
// XXX: while 0 is a valid fd (usually stdin), here we assume
|
||||
// that we never deliberately set LogPipeFd to 0.
|
||||
if config.LogPipeFd > 0 {
|
||||
logrus.SetOutput(os.NewFile(uintptr(config.LogPipeFd), "logpipe"))
|
||||
} else if config.LogFilePath != "" {
|
||||
f, err := os.OpenFile(config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logrus.SetOutput(f)
|
||||
}
|
||||
|
||||
switch config.LogFormat {
|
||||
case "text":
|
||||
// retain logrus's default.
|
||||
case "json":
|
||||
logrus.SetFormatter(new(logrus.JSONFormatter))
|
||||
default:
|
||||
return fmt.Errorf("unknown log-format %q", config.LogFormat)
|
||||
}
|
||||
|
||||
loggingConfigured = true
|
||||
return nil
|
||||
logger.Log(jl.Level, jl.Msg)
|
||||
}
|
||||
|
3
vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
@@ -23,6 +21,7 @@ const (
|
||||
RootlessEUIDAttr uint16 = 27287
|
||||
UidmapPathAttr uint16 = 27288
|
||||
GidmapPathAttr uint16 = 27289
|
||||
MountSourcesAttr uint16 = 27290
|
||||
)
|
||||
|
||||
type Int32msg struct {
|
||||
|
83
vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go
generated
vendored
Normal file
83
vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go
generated
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// mountError holds an error from a failed mount or unmount operation.
|
||||
type mountError struct {
|
||||
op string
|
||||
source string
|
||||
target string
|
||||
procfd string
|
||||
flags uintptr
|
||||
data string
|
||||
err error
|
||||
}
|
||||
|
||||
// Error provides a string error representation.
|
||||
func (e *mountError) Error() string {
|
||||
out := e.op + " "
|
||||
|
||||
if e.source != "" {
|
||||
out += e.source + ":" + e.target
|
||||
} else {
|
||||
out += e.target
|
||||
}
|
||||
if e.procfd != "" {
|
||||
out += " (via " + e.procfd + ")"
|
||||
}
|
||||
|
||||
if e.flags != uintptr(0) {
|
||||
out += ", flags: 0x" + strconv.FormatUint(uint64(e.flags), 16)
|
||||
}
|
||||
if e.data != "" {
|
||||
out += ", data: " + e.data
|
||||
}
|
||||
|
||||
out += ": " + e.err.Error()
|
||||
return out
|
||||
}
|
||||
|
||||
// Unwrap returns the underlying error.
|
||||
// This is a convention used by Go 1.13+ standard library.
|
||||
func (e *mountError) Unwrap() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
// mount is a simple unix.Mount wrapper. If procfd is not empty, it is used
|
||||
// instead of target (and the target is only used to add context to an error).
|
||||
func mount(source, target, procfd, fstype string, flags uintptr, data string) error {
|
||||
dst := target
|
||||
if procfd != "" {
|
||||
dst = procfd
|
||||
}
|
||||
if err := unix.Mount(source, dst, fstype, flags, data); err != nil {
|
||||
return &mountError{
|
||||
op: "mount",
|
||||
source: source,
|
||||
target: target,
|
||||
procfd: procfd,
|
||||
flags: flags,
|
||||
data: data,
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unmount is a simple unix.Unmount wrapper.
|
||||
func unmount(target string, flags int) error {
|
||||
err := unix.Unmount(target, flags)
|
||||
if err != nil {
|
||||
return &mountError{
|
||||
op: "unmount",
|
||||
target: target,
|
||||
flags: uintptr(flags),
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
6
vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
generated
vendored
@@ -1,11 +1,9 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
@@ -75,7 +73,7 @@ func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, er
|
||||
|
||||
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
|
||||
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
|
||||
data, err := os.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
5
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
@@ -1,11 +1,8 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -35,7 +32,7 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
|
||||
|
||||
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
|
||||
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
|
||||
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0o700); err != nil {
|
||||
if err := os.WriteFile(eventControlPath, []byte(data), 0o700); err != nil {
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
|
@@ -1,13 +1,11 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -15,19 +13,19 @@ import (
|
||||
func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) {
|
||||
fd, err := unix.InotifyInit()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to init inotify")
|
||||
return nil, fmt.Errorf("unable to init inotify: %w", err)
|
||||
}
|
||||
// watching oom kill
|
||||
evFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, evName), unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, errors.Wrap(err, "unable to add inotify watch")
|
||||
return nil, fmt.Errorf("unable to add inotify watch: %w", err)
|
||||
}
|
||||
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
|
||||
cgFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, cgEvName), unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, errors.Wrap(err, "unable to add inotify watch")
|
||||
return nil, fmt.Errorf("unable to add inotify watch: %w", err)
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
@@ -53,7 +51,7 @@ func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, err
|
||||
offset = 0
|
||||
for offset <= uint32(n-unix.SizeofInotifyEvent) {
|
||||
rawEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buffer[offset]))
|
||||
offset += unix.SizeofInotifyEvent + uint32(rawEvent.Len)
|
||||
offset += unix.SizeofInotifyEvent + rawEvent.Len
|
||||
if rawEvent.Mask&unix.IN_MODIFY != unix.IN_MODIFY {
|
||||
continue
|
||||
}
|
19
vendor/github.com/opencontainers/runc/libcontainer/process.go
generated
vendored
19
vendor/github.com/opencontainers/runc/libcontainer/process.go
generated
vendored
@@ -1,7 +1,7 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var errInvalidProcess = errors.New("invalid process")
|
||||
|
||||
type processOperations interface {
|
||||
wait() (*os.ProcessState, error)
|
||||
signal(sig os.Signal) error
|
||||
@@ -78,13 +80,22 @@ type Process struct {
|
||||
ops processOperations
|
||||
|
||||
LogLevel string
|
||||
|
||||
// SubCgroupPaths specifies sub-cgroups to run the process in.
|
||||
// Map keys are controller names, map values are paths (relative to
|
||||
// container's top-level cgroup).
|
||||
//
|
||||
// If empty, the default top-level container's cgroup is used.
|
||||
//
|
||||
// For cgroup v2, the only key allowed is "".
|
||||
SubCgroupPaths map[string]string
|
||||
}
|
||||
|
||||
// Wait waits for the process to exit.
|
||||
// Wait releases any resources associated with the Process
|
||||
func (p Process) Wait() (*os.ProcessState, error) {
|
||||
if p.ops == nil {
|
||||
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
return nil, errInvalidProcess
|
||||
}
|
||||
return p.ops.wait()
|
||||
}
|
||||
@@ -94,7 +105,7 @@ func (p Process) Pid() (int, error) {
|
||||
// math.MinInt32 is returned here, because it's invalid value
|
||||
// for the kill() system call.
|
||||
if p.ops == nil {
|
||||
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
return math.MinInt32, errInvalidProcess
|
||||
}
|
||||
return p.ops.pid(), nil
|
||||
}
|
||||
@@ -102,7 +113,7 @@ func (p Process) Pid() (int, error) {
|
||||
// Signal sends a signal to the Process.
|
||||
func (p Process) Signal(sig os.Signal) error {
|
||||
if p.ops == nil {
|
||||
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
return errInvalidProcess
|
||||
}
|
||||
return p.ops.signal(sig)
|
||||
}
|
||||
|
215
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
215
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
@@ -7,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@@ -25,10 +24,6 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Synchronisation value for cgroup namespace setup.
|
||||
// The same constant is defined in nsexec.c as "CREATECGROUPNS".
|
||||
const createCgroupns = 0x80
|
||||
|
||||
type parentProcess interface {
|
||||
// pid returns the pid for the running process.
|
||||
pid() int
|
||||
@@ -96,7 +91,7 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
p.messageSockPair.child.Close()
|
||||
p.logFilePair.child.Close()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "starting setns process")
|
||||
return fmt.Errorf("error starting setns process: %w", err)
|
||||
}
|
||||
|
||||
waitInit := initWaiter(p.messageSockPair.parent)
|
||||
@@ -104,7 +99,7 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
if retErr != nil {
|
||||
if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom {
|
||||
// Someone in this cgroup was killed, this _might_ be us.
|
||||
retErr = newSystemErrorWithCause(retErr, "possibly OOM-killed")
|
||||
retErr = fmt.Errorf("%w (possibly OOM-killed)", retErr)
|
||||
}
|
||||
werr := <-waitInit
|
||||
if werr != nil {
|
||||
@@ -119,7 +114,7 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
|
||||
if p.bootstrapData != nil {
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
return fmt.Errorf("error copying bootstrap data to pipe: %w", err)
|
||||
}
|
||||
}
|
||||
err = <-waitInit
|
||||
@@ -127,14 +122,14 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
return err
|
||||
}
|
||||
if err := p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
return fmt.Errorf("error executing setns process: %w", err)
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
|
||||
// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
|
||||
for _, path := range p.cgroupPaths {
|
||||
if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups {
|
||||
// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
|
||||
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
|
||||
// Try to join the cgroup of InitProcessPid.
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
|
||||
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
|
||||
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
|
||||
if initCgErr == nil {
|
||||
@@ -148,7 +143,7 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -157,17 +152,17 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
_, err := os.Stat(p.intelRdtPath)
|
||||
if err == nil {
|
||||
if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
|
||||
return fmt.Errorf("error adding pid %d to Intel RDT: %w", p.pid(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for process")
|
||||
return fmt.Errorf("error setting rlimits for process: %w", err)
|
||||
}
|
||||
if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing config to pipe")
|
||||
return fmt.Errorf("error writing config to pipe: %w", err)
|
||||
}
|
||||
|
||||
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
|
||||
@@ -178,13 +173,49 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
case procHooks:
|
||||
// This shouldn't happen.
|
||||
panic("unexpected procHooks in setns")
|
||||
case procSeccomp:
|
||||
if p.config.Config.Seccomp.ListenerPath == "" {
|
||||
return errors.New("listenerPath is not set")
|
||||
}
|
||||
|
||||
seccompFd, err := recvSeccompFd(uintptr(p.pid()), uintptr(sync.Fd))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unix.Close(seccompFd)
|
||||
|
||||
bundle, annotations := utils.Annotations(p.config.Config.Labels)
|
||||
containerProcessState := &specs.ContainerProcessState{
|
||||
Version: specs.Version,
|
||||
Fds: []string{specs.SeccompFdName},
|
||||
Pid: p.cmd.Process.Pid,
|
||||
Metadata: p.config.Config.Seccomp.ListenerMetadata,
|
||||
State: specs.State{
|
||||
Version: specs.Version,
|
||||
ID: p.config.ContainerId,
|
||||
Status: specs.StateRunning,
|
||||
Pid: p.initProcessPid,
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
},
|
||||
}
|
||||
if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
|
||||
containerProcessState, seccompFd); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Sync with child.
|
||||
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
return newSystemError(errors.New("invalid JSON payload from child"))
|
||||
return errors.New("invalid JSON payload from child")
|
||||
}
|
||||
})
|
||||
|
||||
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
|
||||
return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
@@ -202,16 +233,16 @@ func (p *setnsProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
_ = p.cmd.Wait()
|
||||
return newSystemErrorWithCause(err, "waiting on setns process to finish")
|
||||
return fmt.Errorf("error waiting on setns process to finish: %w", err)
|
||||
}
|
||||
if !status.Success() {
|
||||
_ = p.cmd.Wait()
|
||||
return newSystemError(&exec.ExitError{ProcessState: status})
|
||||
return &exec.ExitError{ProcessState: status}
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
|
||||
_ = p.cmd.Wait()
|
||||
return newSystemErrorWithCause(err, "reading pid from init pipe")
|
||||
return fmt.Errorf("error reading pid from init pipe: %w", err)
|
||||
}
|
||||
|
||||
// Clean up the zombie parent process
|
||||
@@ -335,7 +366,7 @@ func (p *initProcess) start() (retErr error) {
|
||||
_ = p.logFilePair.child.Close()
|
||||
if err != nil {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
return fmt.Errorf("unable to start init: %w", err)
|
||||
}
|
||||
|
||||
waitInit := initWaiter(p.messageSockPair.parent)
|
||||
@@ -355,9 +386,9 @@ func (p *initProcess) start() (retErr error) {
|
||||
if logrus.GetLevel() >= logrus.DebugLevel {
|
||||
// Only show the original error if debug is set,
|
||||
// as it is not generally very useful.
|
||||
retErr = newSystemErrorWithCause(retErr, oomError)
|
||||
retErr = fmt.Errorf(oomError+": %w", retErr)
|
||||
} else {
|
||||
retErr = newSystemError(errors.New(oomError))
|
||||
retErr = errors.New(oomError)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -382,15 +413,15 @@ func (p *initProcess) start() (retErr error) {
|
||||
// cgroup. We don't need to worry about not doing this and not being root
|
||||
// because we'd be using the rootless cgroup manager in that case.
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
|
||||
return fmt.Errorf("unable to apply Intel RDT configuration: %w", err)
|
||||
}
|
||||
}
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
return fmt.Errorf("can't copy bootstrap data to pipe: %w", err)
|
||||
}
|
||||
err = <-waitInit
|
||||
if err != nil {
|
||||
@@ -399,7 +430,7 @@ func (p *initProcess) start() (retErr error) {
|
||||
|
||||
childPid, err := p.getChildPid()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
|
||||
return fmt.Errorf("can't get final child's PID from pipe: %w", err)
|
||||
}
|
||||
|
||||
// Save the standard descriptor names before the container process
|
||||
@@ -407,30 +438,23 @@ func (p *initProcess) start() (retErr error) {
|
||||
// we won't know at checkpoint time which file descriptor to look up.
|
||||
fds, err := getPipeFds(childPid)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
|
||||
return fmt.Errorf("error getting pipe fds for pid %d: %w", childPid, err)
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
|
||||
// Now it's time to setup cgroup namesapce
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
|
||||
if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil {
|
||||
return newSystemErrorWithCause(err, "sending synchronization value to init process")
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for our first child to exit
|
||||
if err := p.waitForChildExit(childPid); err != nil {
|
||||
return newSystemErrorWithCause(err, "waiting for our first child to exit")
|
||||
return fmt.Errorf("error waiting for our first child to exit: %w", err)
|
||||
}
|
||||
|
||||
if err := p.createNetworkInterfaces(); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating network interfaces")
|
||||
return fmt.Errorf("error creating network interfaces: %w", err)
|
||||
}
|
||||
if err := p.updateSpecState(); err != nil {
|
||||
return newSystemErrorWithCause(err, "updating the spec state")
|
||||
return fmt.Errorf("error updating spec state: %w", err)
|
||||
}
|
||||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemErrorWithCause(err, "sending config to init process")
|
||||
return fmt.Errorf("error sending config to init process: %w", err)
|
||||
}
|
||||
var (
|
||||
sentRun bool
|
||||
@@ -439,25 +463,60 @@ func (p *initProcess) start() (retErr error) {
|
||||
|
||||
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
|
||||
switch sync.Type {
|
||||
case procSeccomp:
|
||||
if p.config.Config.Seccomp.ListenerPath == "" {
|
||||
return errors.New("listenerPath is not set")
|
||||
}
|
||||
|
||||
seccompFd, err := recvSeccompFd(uintptr(childPid), uintptr(sync.Fd))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unix.Close(seccompFd)
|
||||
|
||||
s, err := p.container.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// initProcessStartTime hasn't been set yet.
|
||||
s.Pid = p.cmd.Process.Pid
|
||||
s.Status = specs.StateCreating
|
||||
containerProcessState := &specs.ContainerProcessState{
|
||||
Version: specs.Version,
|
||||
Fds: []string{specs.SeccompFdName},
|
||||
Pid: s.Pid,
|
||||
Metadata: p.config.Config.Seccomp.ListenerMetadata,
|
||||
State: *s,
|
||||
}
|
||||
if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
|
||||
containerProcessState, seccompFd); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Sync with child.
|
||||
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
|
||||
return err
|
||||
}
|
||||
case procReady:
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for ready process")
|
||||
return fmt.Errorf("error setting rlimits for ready process: %w", err)
|
||||
}
|
||||
// call prestart and CreateRuntime hooks
|
||||
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
// Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
return fmt.Errorf("error setting cgroup config for ready process: %w", err)
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
|
||||
return fmt.Errorf("error setting Intel RDT config for ready process: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if p.config.Config.Hooks != nil {
|
||||
if len(p.config.Config.Hooks) != 0 {
|
||||
s, err := p.container.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -493,26 +552,26 @@ func (p *initProcess) start() (retErr error) {
|
||||
// procRun sync.
|
||||
state, uerr := p.container.updateState(p)
|
||||
if uerr != nil {
|
||||
return newSystemErrorWithCause(err, "store init state")
|
||||
return fmt.Errorf("unable to store init state: %w", err)
|
||||
}
|
||||
p.container.initProcessStartTime = state.InitProcessStartTime
|
||||
|
||||
// Sync with child.
|
||||
if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing syncT 'run'")
|
||||
return err
|
||||
}
|
||||
sentRun = true
|
||||
case procHooks:
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
|
||||
return fmt.Errorf("error setting cgroup config for procHooks process: %w", err)
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
|
||||
return fmt.Errorf("error setting Intel RDT config for procHooks process: %w", err)
|
||||
}
|
||||
}
|
||||
if p.config.Config.Hooks != nil {
|
||||
if len(p.config.Config.Hooks) != 0 {
|
||||
s, err := p.container.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -531,24 +590,24 @@ func (p *initProcess) start() (retErr error) {
|
||||
}
|
||||
// Sync with child.
|
||||
if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing syncT 'resume'")
|
||||
return err
|
||||
}
|
||||
sentResume = true
|
||||
default:
|
||||
return newSystemError(errors.New("invalid JSON payload from child"))
|
||||
return errors.New("invalid JSON payload from child")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if !sentRun {
|
||||
return newSystemErrorWithCause(ierr, "container init")
|
||||
return fmt.Errorf("error during container init: %w", ierr)
|
||||
}
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
|
||||
return newSystemError(errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process"))
|
||||
return errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process")
|
||||
}
|
||||
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "shutting down init pipe")
|
||||
return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
|
||||
}
|
||||
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
@@ -634,6 +693,46 @@ func (p *initProcess) forwardChildLogs() chan error {
|
||||
return logs.ForwardLogs(p.logFilePair.parent)
|
||||
}
|
||||
|
||||
func recvSeccompFd(childPid, childFd uintptr) (int, error) {
|
||||
pidfd, _, errno := unix.Syscall(unix.SYS_PIDFD_OPEN, childPid, 0, 0)
|
||||
if errno != 0 {
|
||||
return -1, fmt.Errorf("performing SYS_PIDFD_OPEN syscall: %w", errno)
|
||||
}
|
||||
defer unix.Close(int(pidfd))
|
||||
|
||||
seccompFd, _, errno := unix.Syscall(unix.SYS_PIDFD_GETFD, pidfd, childFd, 0)
|
||||
if errno != 0 {
|
||||
return -1, fmt.Errorf("performing SYS_PIDFD_GETFD syscall: %w", errno)
|
||||
}
|
||||
|
||||
return int(seccompFd), nil
|
||||
}
|
||||
|
||||
func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, fd int) error {
|
||||
conn, err := net.Dial("unix", listenerPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err)
|
||||
}
|
||||
|
||||
socket, err := conn.(*net.UnixConn).File()
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get seccomp socket: %w", err)
|
||||
}
|
||||
defer socket.Close()
|
||||
|
||||
b, err := json.Marshal(state)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot marshall seccomp state: %w", err)
|
||||
}
|
||||
|
||||
err = utils.SendFds(socket, b, fd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPipeFds(pid int) ([]string, error) {
|
||||
fds := make([]string, 3)
|
||||
|
||||
@@ -694,7 +793,7 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
|
||||
// change ownership of the pipes in case we are in a user namespace
|
||||
for _, fd := range fds {
|
||||
if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
|
||||
return nil, err
|
||||
return nil, &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
|
||||
}
|
||||
}
|
||||
return i, nil
|
||||
@@ -719,7 +818,7 @@ func initWaiter(r io.Reader) chan error {
|
||||
return
|
||||
}
|
||||
}
|
||||
ch <- newSystemErrorWithCause(err, "waiting for init preliminary setup")
|
||||
ch <- fmt.Errorf("waiting for init preliminary setup: %w", err)
|
||||
}()
|
||||
|
||||
return ch
|
||||
|
15
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
15
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
@@ -1,9 +1,7 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
@@ -31,7 +29,7 @@ type restoredProcess struct {
|
||||
}
|
||||
|
||||
func (p *restoredProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
return errors.New("restored process cannot be started")
|
||||
}
|
||||
|
||||
func (p *restoredProcess) pid() int {
|
||||
@@ -51,7 +49,8 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||
// maybe use --exec-cmd in criu
|
||||
err := p.cmd.Wait()
|
||||
if err != nil {
|
||||
if _, ok := err.(*exec.ExitError); !ok {
|
||||
var exitErr *exec.ExitError
|
||||
if !errors.As(err, &exitErr) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@@ -89,7 +88,7 @@ type nonChildProcess struct {
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
return errors.New("restored process cannot be started")
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) pid() int {
|
||||
@@ -97,11 +96,11 @@ func (p *nonChildProcess) pid() int {
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) terminate() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
|
||||
return errors.New("restored process cannot be terminated")
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
|
||||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||
return nil, errors.New("restored process cannot be waited on")
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) startTime() (uint64, error) {
|
||||
|
250
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
250
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
@@ -1,15 +1,14 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -36,6 +35,7 @@ type mountConfig struct {
|
||||
cgroup2Path string
|
||||
rootlessCgroups bool
|
||||
cgroupns bool
|
||||
fd *int
|
||||
}
|
||||
|
||||
// needsSetupDev returns true if /dev needs to be set up.
|
||||
@@ -51,10 +51,14 @@ func needsSetupDev(config *configs.Config) bool {
|
||||
// prepareRootfs sets up the devices, mount points, and filesystems for use
|
||||
// inside a new mount namespace. It doesn't set anything as ro. You must call
|
||||
// finalizeRootfs after this function to finish setting up the rootfs.
|
||||
func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds []int) (err error) {
|
||||
config := iConfig.Config
|
||||
if err := prepareRoot(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "preparing rootfs")
|
||||
return fmt.Errorf("error preparing rootfs: %w", err)
|
||||
}
|
||||
|
||||
if mountFds != nil && len(mountFds) != len(config.Mounts) {
|
||||
return fmt.Errorf("malformed mountFds slice. Expected size: %v, got: %v. Slice: %v", len(config.Mounts), len(mountFds), mountFds)
|
||||
}
|
||||
|
||||
mountConfig := &mountConfig{
|
||||
@@ -65,32 +69,39 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
|
||||
}
|
||||
setupDev := needsSetupDev(config)
|
||||
for _, m := range config.Mounts {
|
||||
for i, m := range config.Mounts {
|
||||
for _, precmd := range m.PremountCmds {
|
||||
if err := mountCmd(precmd); err != nil {
|
||||
return newSystemErrorWithCause(err, "running premount command")
|
||||
return fmt.Errorf("error running premount command: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Just before the loop we checked that if not empty, len(mountFds) == len(config.Mounts).
|
||||
// Therefore, we can access mountFds[i] without any concerns.
|
||||
if mountFds != nil && mountFds[i] != -1 {
|
||||
mountConfig.fd = &mountFds[i]
|
||||
}
|
||||
|
||||
if err := mountToRootfs(m, mountConfig); err != nil {
|
||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination)
|
||||
return fmt.Errorf("error mounting %q to rootfs at %q: %w", m.Source, m.Destination, err)
|
||||
}
|
||||
|
||||
for _, postcmd := range m.PostmountCmds {
|
||||
if err := mountCmd(postcmd); err != nil {
|
||||
return newSystemErrorWithCause(err, "running postmount command")
|
||||
return fmt.Errorf("error running postmount command: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if setupDev {
|
||||
if err := createDevices(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating device nodes")
|
||||
return fmt.Errorf("error creating device nodes: %w", err)
|
||||
}
|
||||
if err := setupPtmx(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting up ptmx")
|
||||
return fmt.Errorf("error setting up ptmx: %w", err)
|
||||
}
|
||||
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting up /dev symlinks")
|
||||
return fmt.Errorf("error setting up /dev symlinks: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +123,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
// operation not being perfectly split).
|
||||
|
||||
if err := unix.Chdir(config.Rootfs); err != nil {
|
||||
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
|
||||
return &os.PathError{Op: "chdir", Path: config.Rootfs, Err: err}
|
||||
}
|
||||
|
||||
s := iConfig.SpecState
|
||||
@@ -130,12 +141,12 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
err = chroot()
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "jailing process inside rootfs")
|
||||
return fmt.Errorf("error jailing process inside rootfs: %w", err)
|
||||
}
|
||||
|
||||
if setupDev {
|
||||
if err := reOpenDevNull(); err != nil {
|
||||
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
|
||||
return fmt.Errorf("error reopening /dev/null inside container: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,7 +172,7 @@ func finalizeRootfs(config *configs.Config) (err error) {
|
||||
}
|
||||
if m.Device == "tmpfs" || utils.CleanPath(m.Destination) == "/dev" {
|
||||
if err := remountReadonly(m); err != nil {
|
||||
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -169,7 +180,7 @@ func finalizeRootfs(config *configs.Config) (err error) {
|
||||
// set rootfs ( / ) as readonly
|
||||
if config.Readonlyfs {
|
||||
if err := setReadonly(); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rootfs as readonly")
|
||||
return fmt.Errorf("error setting rootfs as readonly: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,14 +194,14 @@ func finalizeRootfs(config *configs.Config) (err error) {
|
||||
|
||||
// /tmp has to be mounted as private to allow MS_MOVE to work in all situations
|
||||
func prepareTmp(topTmpDir string) (string, error) {
|
||||
tmpdir, err := ioutil.TempDir(topTmpDir, "runctop")
|
||||
tmpdir, err := os.MkdirTemp(topTmpDir, "runctop")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := unix.Mount(tmpdir, tmpdir, "bind", unix.MS_BIND, ""); err != nil {
|
||||
if err := mount(tmpdir, tmpdir, "", "bind", unix.MS_BIND, ""); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := unix.Mount("", tmpdir, "", uintptr(unix.MS_PRIVATE), ""); err != nil {
|
||||
if err := mount("", tmpdir, "", "", uintptr(unix.MS_PRIVATE), ""); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return tmpdir, nil
|
||||
@@ -206,13 +217,18 @@ func mountCmd(cmd configs.Command) error {
|
||||
command.Env = cmd.Env
|
||||
command.Dir = cmd.Dir
|
||||
if out, err := command.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
|
||||
return fmt.Errorf("%#v failed: %s: %w", cmd, string(out), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareBindMount(m *configs.Mount, rootfs string) error {
|
||||
stat, err := os.Stat(m.Source)
|
||||
func prepareBindMount(m *configs.Mount, rootfs string, mountFd *int) error {
|
||||
source := m.Source
|
||||
if mountFd != nil {
|
||||
source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
|
||||
}
|
||||
|
||||
stat, err := os.Stat(source)
|
||||
if err != nil {
|
||||
// error out if the source of a bind mount does not exist as we will be
|
||||
// unable to bind anything to it.
|
||||
@@ -226,7 +242,7 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
||||
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||
if err := checkProcMount(rootfs, dest, source); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
@@ -256,9 +272,11 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
|
||||
Data: "mode=755",
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
}
|
||||
|
||||
if err := mountToRootfs(tmpfs, c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, b := range binds {
|
||||
if c.cgroupns {
|
||||
subsystemPath := filepath.Join(c.root, b.Destination)
|
||||
@@ -278,7 +296,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
|
||||
data = cgroups.CgroupNamePrefix + data
|
||||
source = "systemd"
|
||||
}
|
||||
return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
|
||||
return mount(source, b.Destination, procfd, "cgroup", uintptr(flags), data)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -310,9 +328,9 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
||||
return err
|
||||
}
|
||||
return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||
if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
if err == unix.EPERM || err == unix.EBUSY {
|
||||
if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) {
|
||||
src := fs2.UnifiedMountpoint
|
||||
if c.cgroupns && c.cgroup2Path != "" {
|
||||
// Emulate cgroupns by bind-mounting
|
||||
@@ -320,8 +338,8 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
||||
// the whole /sys/fs/cgroup.
|
||||
src = c.cgroup2Path
|
||||
}
|
||||
err = unix.Mount(src, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
if err == unix.ENOENT && c.rootlessCgroups {
|
||||
err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
@@ -335,12 +353,12 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
// Set up a scratch dir for the tmpfs on the host.
|
||||
tmpdir, err := prepareTmp("/tmp")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||
return fmt.Errorf("tmpcopyup: failed to setup tmpdir: %w", err)
|
||||
}
|
||||
defer cleanupTmp(tmpdir)
|
||||
tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
|
||||
tmpDir, err := os.MkdirTemp(tmpdir, "runctmpdir")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
return fmt.Errorf("tmpcopyup: failed to create tmpdir: %w", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
@@ -348,15 +366,15 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
// m.Destination since we are going to mount *on the host*.
|
||||
oldDest := m.Destination
|
||||
m.Destination = tmpDir
|
||||
err = mountPropagate(m, "/", mountLabel)
|
||||
err = mountPropagate(m, "/", mountLabel, nil)
|
||||
m.Destination = oldDest
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
||||
logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
|
||||
if err := unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
||||
logrus.Warnf("tmpcopyup: %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
@@ -369,8 +387,8 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err)
|
||||
}
|
||||
// Now move the mount into the container.
|
||||
if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
|
||||
return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %w", tmpDir, procfd, m.Destination, err)
|
||||
if err := mount(tmpDir, m.Destination, procfd, "", unix.MS_MOVE, ""); err != nil {
|
||||
return fmt.Errorf("tmpcopyup: failed to move mount: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
@@ -379,6 +397,7 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
rootfs := c.root
|
||||
mountLabel := c.label
|
||||
mountFd := c.fd
|
||||
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -402,12 +421,12 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
return err
|
||||
}
|
||||
// Selinux kernels do not support labeling of /proc or /sys
|
||||
return mountPropagate(m, rootfs, "")
|
||||
return mountPropagate(m, rootfs, "", nil)
|
||||
case "mqueue":
|
||||
if err := os.MkdirAll(dest, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, ""); err != nil {
|
||||
if err := mountPropagate(m, rootfs, "", nil); err != nil {
|
||||
return err
|
||||
}
|
||||
return label.SetFileLabel(dest, mountLabel)
|
||||
@@ -422,11 +441,13 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
||||
err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
||||
} else {
|
||||
err = mountPropagate(m, rootfs, mountLabel)
|
||||
err = mountPropagate(m, rootfs, mountLabel, nil)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if stat != nil {
|
||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||
return err
|
||||
@@ -434,17 +455,17 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
}
|
||||
return nil
|
||||
case "bind":
|
||||
if err := prepareBindMount(m, rootfs); err != nil {
|
||||
if err := prepareBindMount(m, rootfs, mountFd); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
if err := mountPropagate(m, rootfs, mountLabel, mountFd); err != nil {
|
||||
return err
|
||||
}
|
||||
// bind mount won't change mount options, we need remount to make mount options effective.
|
||||
// first check that we have non-default options required before attempting a remount
|
||||
if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
|
||||
// only remount if unique mount options are set
|
||||
if err := remount(m, rootfs); err != nil {
|
||||
if err := remount(m, rootfs, mountFd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -470,7 +491,10 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
if err := os.MkdirAll(dest, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return mountPropagate(m, rootfs, mountLabel)
|
||||
return mountPropagate(m, rootfs, mountLabel, mountFd)
|
||||
}
|
||||
if err := setRecAttr(m, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -570,7 +594,7 @@ func checkProcMount(rootfs, dest, source string) error {
|
||||
func isProc(path string) (bool, error) {
|
||||
var s unix.Statfs_t
|
||||
if err := unix.Statfs(path, &s); err != nil {
|
||||
return false, err
|
||||
return false, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
return s.Type == unix.PROC_SUPER_MAGIC, nil
|
||||
}
|
||||
@@ -593,7 +617,7 @@ func setupDevSymlinks(rootfs string) error {
|
||||
dst = filepath.Join(rootfs, link[1])
|
||||
)
|
||||
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("symlink %s %s %s", src, dst, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -607,20 +631,24 @@ func reOpenDevNull() error {
|
||||
var stat, devNullStat unix.Stat_t
|
||||
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
||||
return err
|
||||
}
|
||||
defer file.Close() //nolint: errcheck
|
||||
if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "fstat", Path: file.Name(), Err: err}
|
||||
}
|
||||
for fd := 0; fd < 3; fd++ {
|
||||
if err := unix.Fstat(fd, &stat); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(fd), Err: err}
|
||||
}
|
||||
if stat.Rdev == devNullStat.Rdev {
|
||||
// Close and re-open the fd.
|
||||
if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
|
||||
return err
|
||||
return &os.PathError{
|
||||
Op: "dup3",
|
||||
Path: "fd " + strconv.Itoa(int(file.Fd())),
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -658,7 +686,7 @@ func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
|
||||
_ = f.Close()
|
||||
}
|
||||
return utils.WithProcfd(rootfs, dest, func(procfd string) error {
|
||||
return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
|
||||
return mount(node.Path, dest, procfd, "bind", unix.MS_BIND, "")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -679,9 +707,9 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
|
||||
return bindMountDeviceNode(rootfs, dest, node)
|
||||
}
|
||||
if err := mknodDevice(dest, node); err != nil {
|
||||
if os.IsExist(err) {
|
||||
if errors.Is(err, os.ErrExist) {
|
||||
return nil
|
||||
} else if os.IsPermission(err) {
|
||||
} else if errors.Is(err, os.ErrPermission) {
|
||||
return bindMountDeviceNode(rootfs, dest, node)
|
||||
}
|
||||
return err
|
||||
@@ -706,9 +734,9 @@ func mknodDevice(dest string, node *devices.Device) error {
|
||||
return err
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "mknod", Path: dest, Err: err}
|
||||
}
|
||||
return unix.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
return os.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
@@ -755,7 +783,7 @@ func rootfsParentMountPrivate(rootfs string) error {
|
||||
// shared. Secondly when we bind mount rootfs it will propagate to
|
||||
// parent namespace and we don't want that to happen.
|
||||
if sharedMount {
|
||||
return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
|
||||
return mount("", parentMount, "", "", unix.MS_PRIVATE, "")
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -766,7 +794,7 @@ func prepareRoot(config *configs.Config) error {
|
||||
if config.RootPropagation != 0 {
|
||||
flag = config.RootPropagation
|
||||
}
|
||||
if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
if err := mount("", "/", "", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -777,13 +805,13 @@ func prepareRoot(config *configs.Config) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
|
||||
return mount(config.Rootfs, config.Rootfs, "", "bind", unix.MS_BIND|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
func setReadonly() error {
|
||||
flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY)
|
||||
|
||||
err := unix.Mount("", "/", "", flags, "")
|
||||
err := mount("", "/", "", "", flags, "")
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -792,7 +820,7 @@ func setReadonly() error {
|
||||
return &os.PathError{Op: "statfs", Path: "/", Err: err}
|
||||
}
|
||||
flags |= uintptr(s.Flags)
|
||||
return unix.Mount("", "/", "", flags, "")
|
||||
return mount("", "/", "", "", flags, "")
|
||||
}
|
||||
|
||||
func setupPtmx(config *configs.Config) error {
|
||||
@@ -801,7 +829,7 @@ func setupPtmx(config *configs.Config) error {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -817,23 +845,23 @@ func pivotRoot(rootfs string) error {
|
||||
|
||||
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "open", Path: "/", Err: err}
|
||||
}
|
||||
defer unix.Close(oldroot) //nolint: errcheck
|
||||
|
||||
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "open", Path: rootfs, Err: err}
|
||||
}
|
||||
defer unix.Close(newroot) //nolint: errcheck
|
||||
|
||||
// Change to the new root so that the pivot_root actually acts on it.
|
||||
if err := unix.Fchdir(newroot); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err}
|
||||
}
|
||||
|
||||
if err := unix.PivotRoot(".", "."); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
return &os.PathError{Op: "pivot_root", Path: ".", Err: err}
|
||||
}
|
||||
|
||||
// Currently our "." is oldroot (according to the current kernel code).
|
||||
@@ -842,7 +870,7 @@ func pivotRoot(rootfs string) error {
|
||||
// pivot_root(2).
|
||||
|
||||
if err := unix.Fchdir(oldroot); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err}
|
||||
}
|
||||
|
||||
// Make oldroot rslave to make sure our unmounts don't propagate to the
|
||||
@@ -850,17 +878,17 @@ func pivotRoot(rootfs string) error {
|
||||
// known to cause issues due to races where we still have a reference to a
|
||||
// mount while a process in the host namespace are trying to operate on
|
||||
// something they think has no mounts (devicemapper in particular).
|
||||
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
if err := mount("", ".", "", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
|
||||
// Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := unmount(".", unix.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Switch back to our shiny new root.
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
return &os.PathError{Op: "chdir", Path: "/", Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -899,8 +927,8 @@ func msMoveRoot(rootfs string) error {
|
||||
for _, info := range mountinfos {
|
||||
p := info.Mountpoint
|
||||
// Be sure umount events are not propagated to the host.
|
||||
if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
if err == unix.ENOENT {
|
||||
if err := mount("", p, "", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
if errors.Is(err, unix.ENOENT) {
|
||||
// If the mountpoint doesn't exist that means that we've
|
||||
// already blasted away some parent directory of the mountpoint
|
||||
// and so we don't care about this error.
|
||||
@@ -908,13 +936,13 @@ func msMoveRoot(rootfs string) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
|
||||
if err != unix.EINVAL && err != unix.EPERM {
|
||||
if err := unmount(p, unix.MNT_DETACH); err != nil {
|
||||
if !errors.Is(err, unix.EINVAL) && !errors.Is(err, unix.EPERM) {
|
||||
return err
|
||||
} else {
|
||||
// If we have not privileges for umounting (e.g. rootless), then
|
||||
// cover the path.
|
||||
if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
|
||||
if err := mount("tmpfs", p, "", "tmpfs", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -922,7 +950,7 @@ func msMoveRoot(rootfs string) error {
|
||||
}
|
||||
|
||||
// Move the rootfs on top of "/" in our mount namespace.
|
||||
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
|
||||
if err := mount(rootfs, "/", "", "", unix.MS_MOVE, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return chroot()
|
||||
@@ -930,9 +958,12 @@ func msMoveRoot(rootfs string) error {
|
||||
|
||||
func chroot() error {
|
||||
if err := unix.Chroot("."); err != nil {
|
||||
return err
|
||||
return &os.PathError{Op: "chroot", Path: ".", Err: err}
|
||||
}
|
||||
return unix.Chdir("/")
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return &os.PathError{Op: "chdir", Path: "/", Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createIfNotExists creates a file or a directory only if it does not already exist.
|
||||
@@ -957,11 +988,11 @@ func createIfNotExists(path string, isDir bool) error {
|
||||
|
||||
// readonlyPath will make a path read only.
|
||||
func readonlyPath(path string) error {
|
||||
if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if err := mount(path, path, "", "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil
|
||||
}
|
||||
return &os.PathError{Op: "bind-mount", Path: path, Err: err}
|
||||
return err
|
||||
}
|
||||
|
||||
var s unix.Statfs_t
|
||||
@@ -970,8 +1001,8 @@ func readonlyPath(path string) error {
|
||||
}
|
||||
flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
|
||||
|
||||
if err := unix.Mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
|
||||
return &os.PathError{Op: "bind-mount-ro", Path: path, Err: err}
|
||||
if err := mount(path, path, "", "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -991,14 +1022,12 @@ func remountReadonly(m *configs.Mount) error {
|
||||
// nosuid, etc.). So, let's use that case so that we can do
|
||||
// this re-mount without failing in a userns.
|
||||
flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
|
||||
if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil {
|
||||
switch err {
|
||||
case unix.EBUSY:
|
||||
if err := mount("", dest, "", "", uintptr(flags), ""); err != nil {
|
||||
if errors.Is(err, unix.EBUSY) {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1011,9 +1040,9 @@ func remountReadonly(m *configs.Mount) error {
|
||||
// For files, maskPath bind mounts /dev/null over the top of the specified path.
|
||||
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
|
||||
func maskPath(path string, mountLabel string) error {
|
||||
if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
|
||||
if err == unix.ENOTDIR {
|
||||
return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
|
||||
if err := mount("/dev/null", path, "", "", unix.MS_BIND, ""); err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
if errors.Is(err, unix.ENOTDIR) {
|
||||
return mount("tmpfs", path, "", "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -1024,33 +1053,38 @@ func maskPath(path string, mountLabel string) error {
|
||||
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
|
||||
func writeSystemProperty(key, value string) error {
|
||||
keyPath := strings.Replace(key, ".", "/", -1)
|
||||
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644)
|
||||
return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644)
|
||||
}
|
||||
|
||||
func remount(m *configs.Mount, rootfs string) error {
|
||||
func remount(m *configs.Mount, rootfs string, mountFd *int) error {
|
||||
source := m.Source
|
||||
if mountFd != nil {
|
||||
source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
|
||||
}
|
||||
|
||||
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
flags := uintptr(m.Flags | unix.MS_REMOUNT)
|
||||
err := unix.Mount(m.Source, procfd, m.Device, flags, "")
|
||||
err := mount(source, m.Destination, procfd, m.Device, flags, "")
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
// Check if the source has ro flag...
|
||||
var s unix.Statfs_t
|
||||
if err := unix.Statfs(m.Source, &s); err != nil {
|
||||
return &os.PathError{Op: "statfs", Path: m.Source, Err: err}
|
||||
if err := unix.Statfs(source, &s); err != nil {
|
||||
return &os.PathError{Op: "statfs", Path: source, Err: err}
|
||||
}
|
||||
if s.Flags&unix.MS_RDONLY != unix.MS_RDONLY {
|
||||
return err
|
||||
}
|
||||
// ... and retry the mount with ro flag set.
|
||||
flags |= unix.MS_RDONLY
|
||||
return unix.Mount(m.Source, procfd, m.Device, flags, "")
|
||||
return mount(source, m.Destination, procfd, m.Device, flags, "")
|
||||
})
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
// of propagation flags. This will always be scoped inside the container rootfs.
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string, mountFd *int) error {
|
||||
var (
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
@@ -1067,17 +1101,22 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
// mutating underneath us, we verify that we are actually going to mount
|
||||
// inside the container with WithProcfd() -- mounting through a procfd
|
||||
// mounts on the target.
|
||||
source := m.Source
|
||||
if mountFd != nil {
|
||||
source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
|
||||
}
|
||||
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
|
||||
return mount(source, m.Destination, procfd, m.Device, uintptr(flags), data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("mount through procfd: %w", err)
|
||||
return err
|
||||
}
|
||||
// We have to apply mount propagation flags in a separate WithProcfd() call
|
||||
// because the previous call invalidates the passed procfd -- the mount
|
||||
// target needs to be re-opened.
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
|
||||
if err := mount("", m.Destination, procfd, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -1087,3 +1126,12 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setRecAttr(m *configs.Mount, rootfs string) error {
|
||||
if m.RecAttr == nil {
|
||||
return nil
|
||||
}
|
||||
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr)
|
||||
})
|
||||
}
|
||||
|
51
vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
generated
vendored
51
vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
generated
vendored
@@ -2,6 +2,7 @@ package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
@@ -16,13 +17,36 @@ var operators = map[string]configs.Operator{
|
||||
"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
|
||||
}
|
||||
|
||||
// KnownOperators returns the list of the known operations.
|
||||
// Used by `runc features`.
|
||||
func KnownOperators() []string {
|
||||
var res []string
|
||||
for k := range operators {
|
||||
res = append(res, k)
|
||||
}
|
||||
sort.Strings(res)
|
||||
return res
|
||||
}
|
||||
|
||||
var actions = map[string]configs.Action{
|
||||
"SCMP_ACT_KILL": configs.Kill,
|
||||
"SCMP_ACT_ERRNO": configs.Errno,
|
||||
"SCMP_ACT_TRAP": configs.Trap,
|
||||
"SCMP_ACT_ALLOW": configs.Allow,
|
||||
"SCMP_ACT_TRACE": configs.Trace,
|
||||
"SCMP_ACT_LOG": configs.Log,
|
||||
"SCMP_ACT_KILL": configs.Kill,
|
||||
"SCMP_ACT_ERRNO": configs.Errno,
|
||||
"SCMP_ACT_TRAP": configs.Trap,
|
||||
"SCMP_ACT_ALLOW": configs.Allow,
|
||||
"SCMP_ACT_TRACE": configs.Trace,
|
||||
"SCMP_ACT_LOG": configs.Log,
|
||||
"SCMP_ACT_NOTIFY": configs.Notify,
|
||||
}
|
||||
|
||||
// KnownActions returns the list of the known actions.
|
||||
// Used by `runc features`.
|
||||
func KnownActions() []string {
|
||||
var res []string
|
||||
for k := range actions {
|
||||
res = append(res, k)
|
||||
}
|
||||
sort.Strings(res)
|
||||
return res
|
||||
}
|
||||
|
||||
var archs = map[string]string{
|
||||
@@ -44,6 +68,17 @@ var archs = map[string]string{
|
||||
"SCMP_ARCH_S390X": "s390x",
|
||||
}
|
||||
|
||||
// KnownArchs returns the list of the known archs.
|
||||
// Used by `runc features`.
|
||||
func KnownArchs() []string {
|
||||
var res []string
|
||||
for k := range archs {
|
||||
res = append(res, k)
|
||||
}
|
||||
sort.Strings(res)
|
||||
return res
|
||||
}
|
||||
|
||||
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
|
||||
// Comparison operators use the names they are assigned by Libseccomp's header.
|
||||
// Attempting to convert a string that is not a valid operator results in an
|
||||
@@ -56,9 +91,7 @@ func ConvertStringToOperator(in string) (configs.Operator, error) {
|
||||
}
|
||||
|
||||
// ConvertStringToAction converts a string into a Seccomp rule match action.
|
||||
// Actions use the names they are assigned in Libseccomp's header, though some
|
||||
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
|
||||
// return errors.
|
||||
// Actions use the names they are assigned in Libseccomp's header.
|
||||
// Attempting to convert a string that is not a valid action results in an
|
||||
// error.
|
||||
func ConvertStringToAction(in string) (configs.Action, error) {
|
||||
|
116
vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
generated
vendored
116
vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
generated
vendored
@@ -1,23 +1,25 @@
|
||||
// +build linux,cgo,seccomp
|
||||
//go:build cgo && seccomp
|
||||
// +build cgo,seccomp
|
||||
|
||||
package patchbpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/net/bpf"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// #cgo pkg-config: libseccomp
|
||||
@@ -41,6 +43,11 @@ const uintptr_t C_SET_MODE_FILTER = SECCOMP_SET_MODE_FILTER;
|
||||
#endif
|
||||
const uintptr_t C_FILTER_FLAG_LOG = SECCOMP_FILTER_FLAG_LOG;
|
||||
|
||||
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
|
||||
# define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
|
||||
#endif
|
||||
const uintptr_t C_FILTER_FLAG_NEW_LISTENER = SECCOMP_FILTER_FLAG_NEW_LISTENER;
|
||||
|
||||
// We use the AUDIT_ARCH_* values because those are the ones used by the kernel
|
||||
// and SCMP_ARCH_* sometimes has fake values (such as SCMP_ARCH_X32). But we
|
||||
// use <seccomp.h> so we get libseccomp's fallback definitions of AUDIT_ARCH_*.
|
||||
@@ -85,17 +92,16 @@ loop:
|
||||
// seccomp_export_bpf outputs the program in *host* endian-ness.
|
||||
var insn unix.SockFilter
|
||||
if err := binary.Read(rdr, utils.NativeEndian, &insn); err != nil {
|
||||
switch err {
|
||||
case io.EOF:
|
||||
if errors.Is(err, io.EOF) {
|
||||
// Parsing complete.
|
||||
break loop
|
||||
case io.ErrUnexpectedEOF:
|
||||
// Parsing stopped mid-instruction.
|
||||
return nil, errors.Wrap(err, "program parsing halted mid-instruction")
|
||||
default:
|
||||
// All other errors.
|
||||
return nil, errors.Wrap(err, "parsing instructions")
|
||||
}
|
||||
if errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Parsing stopped mid-instruction.
|
||||
return nil, fmt.Errorf("program parsing halted mid-instruction: %w", err)
|
||||
}
|
||||
// All other errors.
|
||||
return nil, fmt.Errorf("error parsing instructions: %w", err)
|
||||
}
|
||||
program = append(program, bpf.RawInstruction{
|
||||
Op: insn.Code,
|
||||
@@ -110,7 +116,7 @@ loop:
|
||||
func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error) {
|
||||
rdr, wtr, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "creating scratch pipe")
|
||||
return nil, fmt.Errorf("error creating scratch pipe: %w", err)
|
||||
}
|
||||
defer wtr.Close()
|
||||
defer rdr.Close()
|
||||
@@ -124,23 +130,23 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
|
||||
}()
|
||||
|
||||
if err := filter.ExportBPF(wtr); err != nil {
|
||||
return nil, errors.Wrap(err, "exporting BPF")
|
||||
return nil, fmt.Errorf("error exporting BPF: %w", err)
|
||||
}
|
||||
// Close so that the reader actually gets EOF.
|
||||
_ = wtr.Close()
|
||||
|
||||
if copyErr := <-errChan; copyErr != nil {
|
||||
return nil, errors.Wrap(copyErr, "reading from ExportBPF pipe")
|
||||
return nil, fmt.Errorf("error reading from ExportBPF pipe: %w", copyErr)
|
||||
}
|
||||
|
||||
// Parse the instructions.
|
||||
rawProgram, err := parseProgram(readerBuffer)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parsing generated BPF filter")
|
||||
return nil, fmt.Errorf("parsing generated BPF filter: %w", err)
|
||||
}
|
||||
program, ok := bpf.Disassemble(rawProgram)
|
||||
if !ok {
|
||||
return nil, errors.Errorf("could not disassemble entire BPF filter")
|
||||
return nil, errors.New("could not disassemble entire BPF filter")
|
||||
}
|
||||
return program, nil
|
||||
}
|
||||
@@ -155,7 +161,7 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
|
||||
// Convert to actual native architecture.
|
||||
arch, err := libseccomp.GetNativeArch()
|
||||
if err != nil {
|
||||
return invalidArch, errors.Wrap(err, "get native arch")
|
||||
return invalidArch, fmt.Errorf("unable to get native arch: %w", err)
|
||||
}
|
||||
return archToNative(arch)
|
||||
case libseccomp.ArchX86:
|
||||
@@ -192,7 +198,7 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
|
||||
case libseccomp.ArchS390X:
|
||||
return nativeArch(C.C_AUDIT_ARCH_S390X), nil
|
||||
default:
|
||||
return invalidArch, errors.Errorf("unknown architecture: %v", arch)
|
||||
return invalidArch, fmt.Errorf("unknown architecture: %v", arch)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,7 +215,7 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
for _, ociArch := range config.Architectures {
|
||||
arch, err := libseccomp.GetArchFromString(ociArch)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "validating seccomp architecture")
|
||||
return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
|
||||
}
|
||||
|
||||
// Map native architecture to a real architecture value to avoid
|
||||
@@ -217,7 +223,7 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
if arch == libseccomp.ArchNative {
|
||||
nativeArch, err := libseccomp.GetNativeArch()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get native arch")
|
||||
return nil, fmt.Errorf("unable to get native architecture: %w", err)
|
||||
}
|
||||
arch = nativeArch
|
||||
}
|
||||
@@ -225,7 +231,7 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
// Figure out native architecture representation of the architecture.
|
||||
nativeArch, err := archToNative(arch)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "cannot map architecture %v to AUDIT_ARCH_ constant", arch)
|
||||
return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
|
||||
}
|
||||
|
||||
if _, ok := lastSyscalls[nativeArch]; !ok {
|
||||
@@ -370,7 +376,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
},
|
||||
}, sectionTail...)
|
||||
default:
|
||||
return nil, errors.Errorf("unknown amd64 native architecture %#x", scmpArch)
|
||||
return nil, fmt.Errorf("unknown amd64 native architecture %#x", scmpArch)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -378,16 +384,16 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
case 2:
|
||||
// x32 and x86_64 are a unique case, we can't handle any others.
|
||||
if uint32(nativeArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
return nil, errors.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
|
||||
return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
|
||||
}
|
||||
|
||||
x32sysno, ok := maxSyscalls[libseccomp.ArchX32]
|
||||
if !ok {
|
||||
return nil, errors.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchX32, maxSyscalls)
|
||||
return nil, fmt.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchX32, maxSyscalls)
|
||||
}
|
||||
x86sysno, ok := maxSyscalls[libseccomp.ArchAMD64]
|
||||
if !ok {
|
||||
return nil, errors.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchAMD64, maxSyscalls)
|
||||
return nil, fmt.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchAMD64, maxSyscalls)
|
||||
}
|
||||
|
||||
// The x32 ABI indicates that a syscall is being made by an x32
|
||||
@@ -448,7 +454,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
}...)
|
||||
}
|
||||
default:
|
||||
return nil, errors.Errorf("invalid number of architecture overlaps: %v", len(maxSyscalls))
|
||||
return nil, fmt.Errorf("invalid number of architecture overlaps: %v", len(maxSyscalls))
|
||||
}
|
||||
|
||||
// Prepend this section to the tail.
|
||||
@@ -517,7 +523,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
func assemble(program []bpf.Instruction) ([]unix.SockFilter, error) {
|
||||
rawProgram, err := bpf.Assemble(program)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "assembling program")
|
||||
return nil, fmt.Errorf("error assembling program: %w", err)
|
||||
}
|
||||
|
||||
// Convert to []unix.SockFilter for unix.SockFilter.
|
||||
@@ -547,11 +553,11 @@ func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) {
|
||||
|
||||
lastSyscalls, err := findLastSyscalls(config)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "finding last syscalls for -ENOSYS stub")
|
||||
return nil, fmt.Errorf("error finding last syscalls for -ENOSYS stub: %w", err)
|
||||
}
|
||||
stubProgram, err := generateEnosysStub(lastSyscalls)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "generating -ENOSYS stub")
|
||||
return nil, fmt.Errorf("error generating -ENOSYS stub: %w", err)
|
||||
}
|
||||
return stubProgram, nil
|
||||
}
|
||||
@@ -559,12 +565,12 @@ func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) {
|
||||
func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) ([]unix.SockFilter, error) {
|
||||
program, err := disassembleFilter(filter)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "disassembling original filter")
|
||||
return nil, fmt.Errorf("error disassembling original filter: %w", err)
|
||||
}
|
||||
|
||||
patch, err := generatePatch(config)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "generating patch for filter")
|
||||
return nil, fmt.Errorf("error generating patch for filter: %w", err)
|
||||
}
|
||||
fullProgram := append(patch, program...)
|
||||
|
||||
@@ -576,49 +582,61 @@ func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (
|
||||
|
||||
fprog, err := assemble(fullProgram)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "assembling modified filter")
|
||||
return nil, fmt.Errorf("error assembling modified filter: %w", err)
|
||||
}
|
||||
return fprog, nil
|
||||
}
|
||||
|
||||
func filterFlags(filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) {
|
||||
func filterFlags(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) {
|
||||
// Ignore the error since pre-2.4 libseccomp is treated as API level 0.
|
||||
apiLevel, _ := libseccomp.GetApi()
|
||||
apiLevel, _ := libseccomp.GetAPI()
|
||||
|
||||
noNewPrivs, err = filter.GetNoNewPrivsBit()
|
||||
if err != nil {
|
||||
return 0, false, errors.Wrap(err, "fetch no_new_privs filter bit")
|
||||
return 0, false, fmt.Errorf("unable to fetch no_new_privs filter bit: %w", err)
|
||||
}
|
||||
|
||||
if apiLevel >= 3 {
|
||||
if logBit, err := filter.GetLogBit(); err != nil {
|
||||
return 0, false, errors.Wrap(err, "fetch SECCOMP_FILTER_FLAG_LOG bit")
|
||||
return 0, false, fmt.Errorf("unable to fetch SECCOMP_FILTER_FLAG_LOG bit: %w", err)
|
||||
} else if logBit {
|
||||
flags |= uint(C.C_FILTER_FLAG_LOG)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Support seccomp flags not yet added to libseccomp-golang...
|
||||
|
||||
for _, call := range config.Syscalls {
|
||||
if call.Action == configs.Notify {
|
||||
flags |= uint(C.C_FILTER_FLAG_NEW_LISTENER)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
|
||||
func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (fd int, err error) {
|
||||
fprog := unix.SockFprog{
|
||||
Len: uint16(len(filter)),
|
||||
Filter: &filter[0],
|
||||
}
|
||||
fd = -1 // only return a valid fd when C_FILTER_FLAG_NEW_LISTENER is set
|
||||
// If no seccomp flags were requested we can use the old-school prctl(2).
|
||||
if flags == 0 {
|
||||
err = unix.Prctl(unix.PR_SET_SECCOMP,
|
||||
unix.SECCOMP_MODE_FILTER,
|
||||
uintptr(unsafe.Pointer(&fprog)), 0, 0)
|
||||
} else {
|
||||
_, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
|
||||
fdptr, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
|
||||
uintptr(C.C_SET_MODE_FILTER),
|
||||
uintptr(flags), uintptr(unsafe.Pointer(&fprog)))
|
||||
if errno != 0 {
|
||||
err = errno
|
||||
}
|
||||
if flags&uint(C.C_FILTER_FLAG_NEW_LISTENER) != 0 {
|
||||
fd = int(fdptr)
|
||||
}
|
||||
}
|
||||
runtime.KeepAlive(filter)
|
||||
runtime.KeepAlive(fprog)
|
||||
@@ -630,17 +648,17 @@ func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
|
||||
// patches said filter to handle -ENOSYS in a much nicer manner than the
|
||||
// default libseccomp default action behaviour, and loads the patched filter
|
||||
// into the kernel for the current process.
|
||||
func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) error {
|
||||
func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (int, error) {
|
||||
// Generate a patched filter.
|
||||
fprog, err := enosysPatchFilter(config, filter)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "patching filter")
|
||||
return -1, fmt.Errorf("error patching filter: %w", err)
|
||||
}
|
||||
|
||||
// Get the set of libseccomp flags set.
|
||||
seccompFlags, noNewPrivs, err := filterFlags(filter)
|
||||
seccompFlags, noNewPrivs, err := filterFlags(config, filter)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "fetch seccomp filter flags")
|
||||
return -1, fmt.Errorf("unable to fetch seccomp filter flags: %w", err)
|
||||
}
|
||||
|
||||
// Set no_new_privs if it was requested, though in runc we handle
|
||||
@@ -648,13 +666,15 @@ func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) error
|
||||
if noNewPrivs {
|
||||
logrus.Warnf("potentially misconfigured filter -- setting no_new_privs in seccomp path")
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return errors.Wrap(err, "enable no_new_privs bit")
|
||||
return -1, fmt.Errorf("error enabling no_new_privs bit: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, load the filter.
|
||||
if err := sysSeccompSetFilter(seccompFlags, fprog); err != nil {
|
||||
return errors.Wrap(err, "loading seccomp filter")
|
||||
fd, err := sysSeccompSetFilter(seccompFlags, fprog)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("error loading seccomp filter: %w", err)
|
||||
}
|
||||
return nil
|
||||
|
||||
return fd, nil
|
||||
}
|
||||
|
@@ -1,3 +1,4 @@
|
||||
//go:build !linux || !cgo || !seccomp
|
||||
// +build !linux !cgo !seccomp
|
||||
|
||||
package patchbpf
|
||||
|
111
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
111
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
@@ -1,4 +1,5 @@
|
||||
// +build linux,cgo,seccomp
|
||||
//go:build cgo && seccomp
|
||||
// +build cgo,seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
@@ -6,19 +7,16 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp/patchbpf"
|
||||
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
actAllow = libseccomp.ActAllow
|
||||
actTrap = libseccomp.ActTrap
|
||||
actKill = libseccomp.ActKill
|
||||
actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
|
||||
actLog = libseccomp.ActLog
|
||||
actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
|
||||
)
|
||||
|
||||
@@ -27,77 +25,118 @@ const (
|
||||
syscallMaxArguments int = 6
|
||||
)
|
||||
|
||||
// Filters given syscalls in a container, preventing them from being used
|
||||
// Started in the container init process, and carried over to all child processes
|
||||
// Setns calls, however, require a separate invocation, as they are not children
|
||||
// of the init until they join the namespace
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
// InitSeccomp installs the seccomp filters to be used in the container as
|
||||
// specified in config.
|
||||
// Returns the seccomp file descriptor if any of the filters include a
|
||||
// SCMP_ACT_NOTIFY action, otherwise returns -1.
|
||||
func InitSeccomp(config *configs.Seccomp) (int, error) {
|
||||
if config == nil {
|
||||
return errors.New("cannot initialize Seccomp - nil config passed")
|
||||
return -1, errors.New("cannot initialize Seccomp - nil config passed")
|
||||
}
|
||||
|
||||
defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet)
|
||||
if err != nil {
|
||||
return errors.New("error initializing seccomp - invalid default action")
|
||||
return -1, errors.New("error initializing seccomp - invalid default action")
|
||||
}
|
||||
|
||||
// Ignore the error since pre-2.4 libseccomp is treated as API level 0.
|
||||
apiLevel, _ := libseccomp.GetAPI()
|
||||
for _, call := range config.Syscalls {
|
||||
if call.Action == configs.Notify {
|
||||
if apiLevel < 6 {
|
||||
return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel)
|
||||
}
|
||||
|
||||
// We can't allow the write syscall to notify to the seccomp agent.
|
||||
// After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain
|
||||
// number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we
|
||||
// never can write the seccomp fd to the parent and therefore the seccomp agent never receives
|
||||
// the seccomp fd and runc is hang during initialization.
|
||||
//
|
||||
// Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY.
|
||||
// Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and
|
||||
// send the seccomp fd to the agent (it is another process and not subject to the seccomp
|
||||
// filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp
|
||||
// agent allows those syscalls to proceed, initialization works just fine and the agent can
|
||||
// handle future read()/close() syscalls as it wanted.
|
||||
if call.Name == "write" {
|
||||
return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// See comment on why write is not allowed. The same reason applies, as this can mean handling write too.
|
||||
if defaultAction == libseccomp.ActNotify {
|
||||
return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action")
|
||||
}
|
||||
|
||||
filter, err := libseccomp.NewFilter(defaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating filter: %s", err)
|
||||
return -1, fmt.Errorf("error creating filter: %w", err)
|
||||
}
|
||||
|
||||
// Add extra architectures
|
||||
for _, arch := range config.Architectures {
|
||||
scmpArch, err := libseccomp.GetArchFromString(arch)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error validating Seccomp architecture: %s", err)
|
||||
return -1, fmt.Errorf("error validating Seccomp architecture: %w", err)
|
||||
}
|
||||
if err := filter.AddArch(scmpArch); err != nil {
|
||||
return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
|
||||
return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Unset no new privs bit
|
||||
if err := filter.SetNoNewPrivsBit(false); err != nil {
|
||||
return fmt.Errorf("error setting no new privileges: %s", err)
|
||||
return -1, fmt.Errorf("error setting no new privileges: %w", err)
|
||||
}
|
||||
|
||||
// Add a rule for each syscall
|
||||
for _, call := range config.Syscalls {
|
||||
if call == nil {
|
||||
return errors.New("encountered nil syscall while initializing Seccomp")
|
||||
return -1, errors.New("encountered nil syscall while initializing Seccomp")
|
||||
}
|
||||
|
||||
if err := matchCall(filter, call, defaultAction); err != nil {
|
||||
return err
|
||||
return -1, err
|
||||
}
|
||||
}
|
||||
if err := patchbpf.PatchAndLoad(config, filter); err != nil {
|
||||
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
|
||||
|
||||
seccompFd, err := patchbpf.PatchAndLoad(config, filter)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err)
|
||||
}
|
||||
return nil
|
||||
|
||||
return seccompFd, nil
|
||||
}
|
||||
|
||||
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||
func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
|
||||
switch act {
|
||||
case configs.Kill:
|
||||
return actKill, nil
|
||||
return libseccomp.ActKill, nil
|
||||
case configs.Errno:
|
||||
if errnoRet != nil {
|
||||
return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
|
||||
}
|
||||
return actErrno, nil
|
||||
case configs.Trap:
|
||||
return actTrap, nil
|
||||
return libseccomp.ActTrap, nil
|
||||
case configs.Allow:
|
||||
return actAllow, nil
|
||||
return libseccomp.ActAllow, nil
|
||||
case configs.Trace:
|
||||
if errnoRet != nil {
|
||||
return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
|
||||
}
|
||||
return actTrace, nil
|
||||
case configs.Log:
|
||||
return actLog, nil
|
||||
return libseccomp.ActLog, nil
|
||||
case configs.Notify:
|
||||
return libseccomp.ActNotify, nil
|
||||
case configs.KillThread:
|
||||
return libseccomp.ActKillThread, nil
|
||||
case configs.KillProcess:
|
||||
return libseccomp.ActKillProcess, nil
|
||||
default:
|
||||
return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
|
||||
}
|
||||
@@ -162,17 +201,18 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
|
||||
return nil
|
||||
}
|
||||
|
||||
// If we can't resolve the syscall, assume it's not supported on this kernel
|
||||
// Ignore it, don't error out
|
||||
// If we can't resolve the syscall, assume it is not supported
|
||||
// by this kernel. Warn about it, don't error out.
|
||||
callNum, err := libseccomp.GetSyscallFromName(call.Name)
|
||||
if err != nil {
|
||||
logrus.Debugf("unknown seccomp syscall %q ignored", call.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unconditional match - just add the rule
|
||||
if len(call.Args) == 0 {
|
||||
if err := filter.AddRule(callNum, callAct); err != nil {
|
||||
return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err)
|
||||
return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err)
|
||||
}
|
||||
} else {
|
||||
// If two or more arguments have the same condition,
|
||||
@@ -183,7 +223,7 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
|
||||
for _, cond := range call.Args {
|
||||
newCond, err := getCondition(cond)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err)
|
||||
return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err)
|
||||
}
|
||||
|
||||
argCounts[cond.Index] += 1
|
||||
@@ -206,14 +246,14 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
|
||||
condArr := []libseccomp.ScmpCondition{cond}
|
||||
|
||||
if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
|
||||
return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
|
||||
return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No conditions share same argument
|
||||
// Use new, proper behavior
|
||||
if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
|
||||
return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
|
||||
return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -225,3 +265,6 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
|
||||
func Version() (uint, uint, uint) {
|
||||
return libseccomp.GetLibraryVersion()
|
||||
}
|
||||
|
||||
// Enabled is true if seccomp support is compiled in.
|
||||
const Enabled = true
|
||||
|
10
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
//go:build !linux || !cgo || !seccomp
|
||||
// +build !linux !cgo !seccomp
|
||||
|
||||
package seccomp
|
||||
@@ -11,14 +12,17 @@ import (
|
||||
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
|
||||
|
||||
// InitSeccomp does nothing because seccomp is not supported.
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
func InitSeccomp(config *configs.Seccomp) (int, error) {
|
||||
if config != nil {
|
||||
return ErrSeccompNotEnabled
|
||||
return -1, ErrSeccompNotEnabled
|
||||
}
|
||||
return nil
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
// Version returns major, minor, and micro.
|
||||
func Version() (uint, uint, uint) {
|
||||
return 0, 0, 0
|
||||
}
|
||||
|
||||
// Enabled is true if seccomp support is compiled in.
|
||||
const Enabled = false
|
||||
|
39
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
39
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
@@ -1,19 +1,19 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// linuxSetnsInit performs the container's initialization for running a new process
|
||||
@@ -30,9 +30,6 @@ func (l *linuxSetnsInit) getSessionRingName() string {
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
@@ -44,8 +41,8 @@ func (l *linuxSetnsInit) Init() error {
|
||||
// don't bail on ENOSYS.
|
||||
//
|
||||
// TODO(cyphar): And we should have logging here too.
|
||||
if errors.Cause(err) != unix.ENOSYS {
|
||||
return errors.Wrap(err, "join session keyring")
|
||||
if !errors.Is(err, unix.ENOSYS) {
|
||||
return fmt.Errorf("unable to join session keyring: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,7 +67,12 @@ func (l *linuxSetnsInit) Init() error {
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -84,14 +86,19 @@ func (l *linuxSetnsInit) Init() error {
|
||||
// place afterward (reducing the amount of syscalls that users need to
|
||||
// enable in their seccomp profiles).
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to init seccomp: %w", err)
|
||||
}
|
||||
|
||||
if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
logrus.Debugf("setns_init: about to exec")
|
||||
// Close the log pipe fd so the parent's ForwardLogs can exit.
|
||||
if err := unix.Close(l.logFd); err != nil {
|
||||
return newSystemErrorWithCause(err, "closing log pipe fd")
|
||||
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
|
||||
}
|
||||
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
|
27
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
generated
vendored
27
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
generated
vendored
@@ -1,27 +0,0 @@
|
||||
package stacktrace
|
||||
|
||||
import "runtime"
|
||||
|
||||
// Capture captures a stacktrace for the current calling go program
|
||||
//
|
||||
// skip is the number of frames to skip
|
||||
func Capture(userSkip int) Stacktrace {
|
||||
var (
|
||||
skip = userSkip + 1 // add one for our own function
|
||||
frames []Frame
|
||||
prevPc uintptr
|
||||
)
|
||||
for i := skip; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
// detect if caller is repeated to avoid loop, gccgo
|
||||
// currently runs into a loop without this check
|
||||
if !ok || pc == prevPc {
|
||||
break
|
||||
}
|
||||
frames = append(frames, NewFrame(pc, file, line))
|
||||
prevPc = pc
|
||||
}
|
||||
return Stacktrace{
|
||||
Frames: frames,
|
||||
}
|
||||
}
|
38
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
generated
vendored
38
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
generated
vendored
@@ -1,38 +0,0 @@
|
||||
package stacktrace
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NewFrame returns a new stack frame for the provided information
|
||||
func NewFrame(pc uintptr, file string, line int) Frame {
|
||||
fn := runtime.FuncForPC(pc)
|
||||
if fn == nil {
|
||||
return Frame{}
|
||||
}
|
||||
pack, name := parseFunctionName(fn.Name())
|
||||
return Frame{
|
||||
Line: line,
|
||||
File: filepath.Base(file),
|
||||
Package: pack,
|
||||
Function: name,
|
||||
}
|
||||
}
|
||||
|
||||
func parseFunctionName(name string) (string, string) {
|
||||
i := strings.LastIndex(name, ".")
|
||||
if i == -1 {
|
||||
return "", name
|
||||
}
|
||||
return name[:i], name[i+1:]
|
||||
}
|
||||
|
||||
// Frame contains all the information for a stack frame within a go program
|
||||
type Frame struct {
|
||||
File string
|
||||
Function string
|
||||
Package string
|
||||
Line int
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
generated
vendored
@@ -1,5 +0,0 @@
|
||||
package stacktrace
|
||||
|
||||
type Stacktrace struct {
|
||||
Frames []Frame
|
||||
}
|
109
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
109
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
@@ -1,23 +1,22 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
@@ -26,6 +25,7 @@ type linuxStandardInit struct {
|
||||
parentPid int
|
||||
fifoFd int
|
||||
logFd int
|
||||
mountFds []int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
@@ -46,8 +46,6 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
@@ -65,15 +63,15 @@ func (l *linuxStandardInit) Init() error {
|
||||
//
|
||||
// TODO(cyphar): Log this so people know what's going on, once we
|
||||
// have proper logging in 'runc init'.
|
||||
if errors.Cause(err) != unix.ENOSYS {
|
||||
return errors.Wrap(err, "join session keyring")
|
||||
if !errors.Is(err, unix.ENOSYS) {
|
||||
return fmt.Errorf("unable to join session keyring: %w", err)
|
||||
}
|
||||
} else {
|
||||
// Make session keyring searcheable. If we've gotten this far we
|
||||
// Make session keyring searchable. If we've gotten this far we
|
||||
// bail on any error -- we don't want to have a keyring with bad
|
||||
// permissions.
|
||||
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
||||
return errors.Wrap(err, "mod keyring permissions")
|
||||
return fmt.Errorf("unable to mod keyring permissions: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -87,9 +85,23 @@ func (l *linuxStandardInit) Init() error {
|
||||
|
||||
// initialises the labeling system
|
||||
selinux.GetEnabled()
|
||||
if err := prepareRootfs(l.pipe, l.config); err != nil {
|
||||
|
||||
// We don't need the mountFds after prepareRootfs() nor if it fails.
|
||||
err := prepareRootfs(l.pipe, l.config, l.mountFds)
|
||||
for _, m := range l.mountFds {
|
||||
if m == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := unix.Close(m); err != nil {
|
||||
return fmt.Errorf("Unable to close mountFds fds: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set up the console. This has to be done *before* we finalize the rootfs,
|
||||
// but *after* we've given the user the chance to set up all of the mounts
|
||||
// they wanted.
|
||||
@@ -98,7 +110,7 @@ func (l *linuxStandardInit) Init() error {
|
||||
return err
|
||||
}
|
||||
if err := system.Setctty(); err != nil {
|
||||
return errors.Wrap(err, "setctty")
|
||||
return &os.SyscallError{Syscall: "ioctl(setctty)", Err: err}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,52 +123,57 @@ func (l *linuxStandardInit) Init() error {
|
||||
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := unix.Sethostname([]byte(hostname)); err != nil {
|
||||
return errors.Wrap(err, "sethostname")
|
||||
return &os.SyscallError{Syscall: "sethostname", Err: err}
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return errors.Wrap(err, "apply apparmor profile")
|
||||
return fmt.Errorf("unable to apply apparmor profile: %w", err)
|
||||
}
|
||||
|
||||
for key, value := range l.config.Config.Sysctl {
|
||||
if err := writeSystemProperty(key, value); err != nil {
|
||||
return errors.Wrapf(err, "write sysctl key %s", key)
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := readonlyPath(path); err != nil {
|
||||
return errors.Wrapf(err, "readonly path %s", path)
|
||||
return fmt.Errorf("can't make %q read-only: %w", path, err)
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.MaskPaths {
|
||||
if err := maskPath(path, l.config.Config.MountLabel); err != nil {
|
||||
return errors.Wrapf(err, "mask path %s", path)
|
||||
return fmt.Errorf("can't mask path %s: %w", path, err)
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "get pdeath signal")
|
||||
return fmt.Errorf("can't get pdeath signal: %w", err)
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return errors.Wrap(err, "set nonewprivileges")
|
||||
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
|
||||
}
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return errors.Wrap(err, "sync ready")
|
||||
return fmt.Errorf("sync ready: %w", err)
|
||||
}
|
||||
if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
|
||||
return errors.Wrap(err, "set process label")
|
||||
return fmt.Errorf("can't set process label: %w", err)
|
||||
}
|
||||
defer selinux.SetExecLabel("") //nolint: errcheck
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -166,7 +183,7 @@ func (l *linuxStandardInit) Init() error {
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return errors.Wrap(err, "restore pdeath signal")
|
||||
return fmt.Errorf("can't restore pdeath signal: %w", err)
|
||||
}
|
||||
// Compare the parent from the initial start of the init process and make
|
||||
// sure that it did not change. if the parent changes that means it died
|
||||
@@ -181,26 +198,43 @@ func (l *linuxStandardInit) Init() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Set seccomp as close to execve as possible, so as few syscalls take
|
||||
// place afterward (reducing the amount of syscalls that users need to
|
||||
// enable in their seccomp profiles). However, this needs to be done
|
||||
// before closing the pipe since we need it to pass the seccompFd to
|
||||
// the parent.
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to init seccomp: %w", err)
|
||||
}
|
||||
|
||||
if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Close the pipe to signal that we have completed our init.
|
||||
logrus.Debugf("init: closing the pipe to signal completion")
|
||||
_ = l.pipe.Close()
|
||||
|
||||
// Close the log pipe fd so the parent's ForwardLogs can exit.
|
||||
if err := unix.Close(l.logFd); err != nil {
|
||||
return newSystemErrorWithCause(err, "closing log pipe fd")
|
||||
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
|
||||
}
|
||||
|
||||
// Wait for the FIFO to be opened on the other side before exec-ing the
|
||||
// user process. We open it through /proc/self/fd/$fd, because the fd that
|
||||
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
|
||||
// re-open an O_PATH fd through /proc.
|
||||
fd, err := unix.Open("/proc/self/fd/"+strconv.Itoa(l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
|
||||
fifoPath := "/proc/self/fd/" + strconv.Itoa(l.fifoFd)
|
||||
fd, err := unix.Open(fifoPath, unix.O_WRONLY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "open exec fifo")
|
||||
return &os.PathError{Op: "open exec fifo", Path: fifoPath, Err: err}
|
||||
}
|
||||
if _, err := unix.Write(fd, []byte("0")); err != nil {
|
||||
return newSystemErrorWithCause(err, "write 0 exec fifo")
|
||||
return &os.PathError{Op: "write exec fifo", Path: fifoPath, Err: err}
|
||||
}
|
||||
|
||||
// Close the O_PATH fifofd fd before exec because the kernel resets
|
||||
// dumpable in the wrong order. This has been fixed in newer kernels, but
|
||||
// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
|
||||
@@ -208,14 +242,6 @@ func (l *linuxStandardInit) Init() error {
|
||||
// since been resolved.
|
||||
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
|
||||
_ = unix.Close(l.fifoFd)
|
||||
// Set seccomp as close to execve as possible, so as few syscalls take
|
||||
// place afterward (reducing the amount of syscalls that users need to
|
||||
// enable in their seccomp profiles).
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
}
|
||||
}
|
||||
|
||||
s := l.config.SpecState
|
||||
s.Pid = unix.Getpid()
|
||||
@@ -224,8 +250,5 @@ func (l *linuxStandardInit) Init() error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := system.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
||||
return newSystemErrorWithCause(err, "exec user process")
|
||||
}
|
||||
return nil
|
||||
return system.Exec(name, l.config.Args[0:], os.Environ())
|
||||
}
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
@@ -1,5 +1,3 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
@@ -117,7 +115,7 @@ func (r *runningState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
if r.c.runType() == Running {
|
||||
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
|
||||
return ErrRunning
|
||||
}
|
||||
r.c.state = s
|
||||
return nil
|
||||
@@ -132,7 +130,7 @@ func (r *runningState) transition(s containerState) error {
|
||||
|
||||
func (r *runningState) destroy() error {
|
||||
if r.c.runType() == Running {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
return ErrRunning
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
@@ -190,7 +188,7 @@ func (p *pausedState) destroy() error {
|
||||
}
|
||||
return destroy(p.c)
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
|
||||
return ErrPaused
|
||||
}
|
||||
|
||||
// restoredState is the same as the running state but also has associated checkpoint
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user