vendor: bump runc to rc95
runc rc95 contains a fix for CVE-2021-30465. runc rc94 provides fixes and improvements. One notable change is cgroup manager's Set now accept Resources rather than Cgroup (see https://github.com/opencontainers/runc/pull/2906). Modify the code accordingly. Also update runc dependencies (as hinted by hack/lint-depdendencies.sh): github.com/cilium/ebpf v0.5.0 github.com/containerd/console v1.0.2 github.com/coreos/go-systemd/v22 v22.3.1 github.com/godbus/dbus/v5 v5.0.4 github.com/moby/sys/mountinfo v0.4.1 golang.org/x/sys v0.0.0-20210426230700-d19ff857e887 github.com/google/go-cmp v0.5.4 github.com/kr/pretty v0.2.1 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
8
vendor/github.com/opencontainers/runc/libcontainer/README.md
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/README.md
generated
vendored
@@ -57,6 +57,10 @@ struct describing how the container is to be created. A sample would look simila
|
||||
|
||||
```go
|
||||
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
var devices []*configs.DeviceRule
|
||||
for _, device := range specconv.AllowedDevices {
|
||||
devices = append(devices, &device.Rule)
|
||||
}
|
||||
config := &configs.Config{
|
||||
Rootfs: "/your/path/to/rootfs",
|
||||
Capabilities: &configs.Capabilities{
|
||||
@@ -155,7 +159,7 @@ config := &configs.Config{
|
||||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Devices: devices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
@@ -313,7 +317,7 @@ state, err := container.State()
|
||||
#### Checkpoint & Restore
|
||||
|
||||
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
||||
This let's you save the state of a process running inside a container to disk, and then restore
|
||||
This lets you save the state of a process running inside a container to disk, and then restore
|
||||
that state into a new process, on the same machine or on another machine.
|
||||
|
||||
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
||||
|
28
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
generated
vendored
28
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
generated
vendored
@@ -1,27 +1,41 @@
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
appArmorEnabled bool
|
||||
checkAppArmor sync.Once
|
||||
)
|
||||
|
||||
// IsEnabled returns true if apparmor is enabled for the host.
|
||||
func IsEnabled() bool {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
|
||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
return err == nil && bytes.HasPrefix(buf, []byte("Y"))
|
||||
}
|
||||
return false
|
||||
checkAppArmor.Do(func() {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
|
||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||
}
|
||||
})
|
||||
return appArmorEnabled
|
||||
}
|
||||
|
||||
func setProcAttr(attr, value string) error {
|
||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||
// instead of /proc/<tid>/ like libapparmor does
|
||||
f, err := os.OpenFile("/proc/self/attr/"+attr, os.O_WRONLY, 0)
|
||||
attrPath := "/proc/self/attr/apparmor/" + attr
|
||||
if _, err := os.Stat(attrPath); errors.Is(err, os.ErrNotExist) {
|
||||
// fall back to the old convention
|
||||
attrPath = "/proc/self/attr/" + attr
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(attrPath, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
105
vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go
generated
vendored
105
vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go
generated
vendored
@@ -3,16 +3,26 @@
|
||||
package capabilities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT
|
||||
|
||||
var capabilityMap map[string]capability.Cap
|
||||
var (
|
||||
capabilityMap map[string]capability.Cap
|
||||
capTypes = []capability.CapType{
|
||||
capability.BOUNDING,
|
||||
capability.PERMITTED,
|
||||
capability.INHERITABLE,
|
||||
capability.EFFECTIVE,
|
||||
capability.AMBIENT,
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1)
|
||||
@@ -24,73 +34,78 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new Caps from the given Capabilities config.
|
||||
// New creates a new Caps from the given Capabilities config. Unknown Capabilities
|
||||
// or Capabilities that are unavailable in the current environment are ignored,
|
||||
// printing a warning instead.
|
||||
func New(capConfig *configs.Capabilities) (*Caps, error) {
|
||||
var (
|
||||
err error
|
||||
caps Caps
|
||||
err error
|
||||
c Caps
|
||||
)
|
||||
|
||||
if caps.bounding, err = capSlice(capConfig.Bounding); err != nil {
|
||||
unknownCaps := make(map[string]struct{})
|
||||
c.caps = map[capability.CapType][]capability.Cap{
|
||||
capability.BOUNDING: capSlice(capConfig.Bounding, unknownCaps),
|
||||
capability.EFFECTIVE: capSlice(capConfig.Effective, unknownCaps),
|
||||
capability.INHERITABLE: capSlice(capConfig.Inheritable, unknownCaps),
|
||||
capability.PERMITTED: capSlice(capConfig.Permitted, unknownCaps),
|
||||
capability.AMBIENT: capSlice(capConfig.Ambient, unknownCaps),
|
||||
}
|
||||
if c.pid, err = capability.NewPid2(0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if caps.effective, err = capSlice(capConfig.Effective); err != nil {
|
||||
if err = c.pid.Load(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if caps.inheritable, err = capSlice(capConfig.Inheritable); err != nil {
|
||||
return nil, err
|
||||
if len(unknownCaps) > 0 {
|
||||
logrus.Warn("ignoring unknown or unavailable capabilities: ", mapKeys(unknownCaps))
|
||||
}
|
||||
if caps.permitted, err = capSlice(capConfig.Permitted); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if caps.ambient, err = capSlice(capConfig.Ambient); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if caps.pid, err = capability.NewPid2(0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = caps.pid.Load(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &caps, nil
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func capSlice(caps []string) ([]capability.Cap, error) {
|
||||
out := make([]capability.Cap, len(caps))
|
||||
for i, c := range caps {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
// capSlice converts the slice of capability names in caps, to their numeric
|
||||
// equivalent, and returns them as a slice. Unknown or unavailable capabilities
|
||||
// are not returned, but appended to unknownCaps.
|
||||
func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap {
|
||||
var out []capability.Cap
|
||||
for _, c := range caps {
|
||||
if v, ok := capabilityMap[c]; !ok {
|
||||
unknownCaps[c] = struct{}{}
|
||||
} else {
|
||||
out = append(out, v)
|
||||
}
|
||||
out[i] = v
|
||||
}
|
||||
return out, nil
|
||||
return out
|
||||
}
|
||||
|
||||
// mapKeys returns the keys of input in sorted order
|
||||
func mapKeys(input map[string]struct{}) []string {
|
||||
var keys []string
|
||||
for c := range input {
|
||||
keys = append(keys, c)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return keys
|
||||
}
|
||||
|
||||
// Caps holds the capabilities for a container.
|
||||
type Caps struct {
|
||||
pid capability.Capabilities
|
||||
bounding []capability.Cap
|
||||
effective []capability.Cap
|
||||
inheritable []capability.Cap
|
||||
permitted []capability.Cap
|
||||
ambient []capability.Cap
|
||||
pid capability.Capabilities
|
||||
caps map[capability.CapType][]capability.Cap
|
||||
}
|
||||
|
||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||
func (c *Caps) ApplyBoundingSet() error {
|
||||
c.pid.Clear(capability.BOUNDS)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
return c.pid.Apply(capability.BOUNDS)
|
||||
c.pid.Clear(capability.BOUNDING)
|
||||
c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...)
|
||||
return c.pid.Apply(capability.BOUNDING)
|
||||
}
|
||||
|
||||
// Apply sets all the capabilities for the current process in the config.
|
||||
func (c *Caps) ApplyCaps() error {
|
||||
c.pid.Clear(allCapabilityTypes)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
c.pid.Set(capability.PERMITTED, c.permitted...)
|
||||
c.pid.Set(capability.INHERITABLE, c.inheritable...)
|
||||
c.pid.Set(capability.EFFECTIVE, c.effective...)
|
||||
c.pid.Set(capability.AMBIENT, c.ambient...)
|
||||
for _, g := range capTypes {
|
||||
c.pid.Set(g, c.caps[g]...)
|
||||
}
|
||||
return c.pid.Apply(allCapabilityTypes)
|
||||
}
|
||||
|
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
@@ -7,37 +7,44 @@ import (
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Applies cgroup configuration to the process with the specified pid
|
||||
// Apply creates a cgroup, if not yet created, and adds a process
|
||||
// with the specified pid into that cgroup. A special value of -1
|
||||
// can be used to merely create a cgroup.
|
||||
Apply(pid int) error
|
||||
|
||||
// Returns the PIDs inside the cgroup set
|
||||
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// Returns the PIDs inside the cgroup set & all sub-cgroups
|
||||
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||
// any all its sub-cgroups.
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// Returns statistics for the cgroup set
|
||||
// GetStats returns cgroups statistics.
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Toggles the freezer cgroup according with specified state
|
||||
// Freeze sets the freezer cgroup to the specified state.
|
||||
Freeze(state configs.FreezerState) error
|
||||
|
||||
// Destroys the cgroup set
|
||||
// Destroy removes cgroup.
|
||||
Destroy() error
|
||||
|
||||
// Path returns a cgroup path to the specified controller/subsystem.
|
||||
// For cgroupv2, the argument is unused and can be empty.
|
||||
Path(string) string
|
||||
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||
// the resources specified during Manager creation (or the previous call
|
||||
// to Set) are used.
|
||||
Set(r *configs.Resources) error
|
||||
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to restore later.
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||
// restore later.
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the path
|
||||
// to the cgroup for this subsystem.
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||
// path to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||
// unified path.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
@@ -46,6 +53,9 @@ type Manager interface {
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
|
||||
// Whether the cgroup path exists or not
|
||||
// Exists returns whether the cgroup path exists or not.
|
||||
Exists() bool
|
||||
|
||||
// OOMKillCount reports OOM kill count for the cgroup.
|
||||
OOMKillCount() (uint64, error)
|
||||
}
|
||||
|
@@ -127,10 +127,10 @@ func (p *program) appendDevice(dev *devices.Rule) error {
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
|
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
@@ -3,6 +3,7 @@ package ebpf
|
||||
import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -32,12 +33,23 @@ func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
closer := func() error {
|
||||
if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
@@ -25,19 +25,19 @@ func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.BlkioWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
if r.BlkioLeafWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -45,22 +45,22 @@ func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
@@ -32,7 +32,7 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, d.config); err != nil {
|
||||
if err := s.SetRtSched(path, d.config.Resources); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using join(), we need to place the pid
|
||||
@@ -40,23 +40,23 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
|
||||
return cgroups.WriteCgroupProc(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||
if r.CpuRtPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
if r.CpuRtRuntime != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
shares := cgroup.Resources.CpuShares
|
||||
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpuShares != 0 {
|
||||
shares := r.CpuShares
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -72,17 +72,17 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
if r.CpuPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuQuota != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
if r.CpuQuota != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return s.SetRtSched(path, cgroup)
|
||||
return s.SetRtSched(path, r)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
@@ -97,7 +97,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
@@ -43,7 +43,7 @@ func (s *CpuacctGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
@@ -24,17 +24,17 @@ func (s *CpusetGroup) Name() string {
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
|
||||
return s.ApplyDir(path, d.config, d.pid)
|
||||
return s.ApplyDir(path, d.config.Resources, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
if r.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -144,7 +144,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
||||
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
@@ -166,7 +166,7 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
||||
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -241,8 +241,8 @@ func isEmptyCpuset(str string) bool {
|
||||
return str == "" || str == "\n"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
|
||||
if err := s.Set(path, cgroup); err != nil {
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||
if err := s.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
@@ -54,8 +54,8 @@ func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
|
||||
return emu, nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() || cgroup.SkipDevices {
|
||||
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||
if userns.RunningInUserNS() || r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
target, err := buildEmulator(cgroup.Resources.Devices)
|
||||
target, err := buildEmulator(r.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
58
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
58
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -26,29 +27,62 @@ func (s *FreezerGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
switch cgroup.Resources.Freezer {
|
||||
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||
switch r.Freezer {
|
||||
case configs.Frozen:
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
// Freezing failed, and it is bad and dangerous
|
||||
// to leave the cgroup in FROZEN or FREEZING
|
||||
// state, so (try to) thaw it back.
|
||||
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
}
|
||||
}()
|
||||
|
||||
// As per older kernel docs (freezer-subsystem.txt before
|
||||
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||
// userspace should either retry or thaw. While current
|
||||
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||
// the kernel (tested on v5.4, Ubuntu 20.04) can't reliably
|
||||
// freeze a cgroup while new processes keep appearing in it
|
||||
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||
// freeze a cgroup v1 while new processes keep appearing in it
|
||||
// (either via fork/clone or by writing new PIDs to
|
||||
// cgroup.procs).
|
||||
//
|
||||
// The number of retries below is chosen to have a decent
|
||||
// chance to succeed even in the worst case scenario (runc
|
||||
// pause/unpause with parallel runc exec).
|
||||
// The numbers below are empirically chosen to have a decent
|
||||
// chance to succeed in various scenarios ("runc pause/unpause
|
||||
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||
//
|
||||
// Adding any amount of sleep in between retries did not
|
||||
// increase the chances of successful freeze.
|
||||
// increase the chances of successful freeze in "pause/unpause
|
||||
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||
// sleep helped for the case where the system is extremely slow
|
||||
// (CentOS 7 VM on GHA CI).
|
||||
//
|
||||
// Alas, this is still a game of chances, since the real fix
|
||||
// belong to the kernel (cgroup v2 do not have this bug).
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if i%50 == 49 {
|
||||
// Occasional thaw and sleep improves
|
||||
// the chances to succeed in freezing
|
||||
// in case new processes keep appearing
|
||||
// in the cgroup.
|
||||
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i%25 == 24 {
|
||||
// Occasional short sleep before reading
|
||||
// the state back also improves the chances to
|
||||
// succeed in freezing in case of a very slow
|
||||
// system.
|
||||
time.Sleep(10 * time.Microsecond)
|
||||
}
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -58,6 +92,9 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
case "FREEZING":
|
||||
continue
|
||||
case string(configs.Frozen):
|
||||
if i > 1 {
|
||||
logrus.Debugf("frozen after %d retries", i)
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
// should never happen
|
||||
@@ -65,16 +102,13 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
}
|
||||
}
|
||||
// Despite our best efforts, it got stuck in FREEZING.
|
||||
// Leaving it in this state is bad and dangerous, so
|
||||
// let's (try to) thaw it back and error out.
|
||||
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
return errors.New("unable to freeze")
|
||||
case configs.Thawed:
|
||||
return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||
}
|
||||
}
|
||||
|
||||
|
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
@@ -9,6 +9,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
@@ -43,8 +44,8 @@ type subsystem interface {
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Creates and joins the cgroup represented by 'cgroupData'.
|
||||
Apply(path string, c *cgroupData) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
// Set sets the cgroup resources.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
@@ -273,8 +274,8 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container.Cgroups == nil {
|
||||
func (m *manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -283,7 +284,7 @@ func (m *manager) Set(container *configs.Config) error {
|
||||
if m.cgroups != nil && m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
if container.Cgroups.Resources.Unified != nil {
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
@@ -291,11 +292,11 @@ func (m *manager) Set(container *configs.Config) error {
|
||||
defer m.mu.Unlock()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
if m.rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
@@ -321,7 +322,7 @@ func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(path, m.cgroups); err != nil {
|
||||
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
@@ -421,3 +422,17 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.Path("memory"))
|
||||
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||
if err != nil && m.rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
@@ -22,8 +22,8 @@ func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
if err := fscommon.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
generated
vendored
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
generated
vendored
@@ -1,56 +0,0 @@
|
||||
// +build linux,!nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Ensure that kernel memory is available in this kernel build. If it
|
||||
// isn't, we just ignore it because EnableKernelMemoryAccounting is
|
||||
// automatically called for all memory limits.
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
return nil
|
||||
}
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// We have specifically been asked to set a kmem limit. If the kernel
|
||||
// doesn't support it we *must* error out.
|
||||
return errors.New("kernel memory accounting not supported by this kernel")
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupKernelMemoryLimit, strconv.FormatInt(kernelMemoryLimit, 10)); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if errors.Is(err, unix.EBUSY) {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem_disabled.go
generated
vendored
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem_disabled.go
generated
vendored
@@ -1,15 +0,0 @@
|
||||
// +build linux,nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
return errors.New("kernel memory accounting disabled in this runc build")
|
||||
}
|
151
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
151
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
@@ -14,11 +14,15 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
@@ -29,48 +33,55 @@ func (s *MemoryGroup) Name() string {
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if memoryAssigned(d.config) {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Only enable kernel memory accouting when this cgroup
|
||||
// is created by libcontainer, otherwise we might get
|
||||
// error when people use `cgroupsPath` to join an existed
|
||||
// cgroup whose kernel memory is not initialized.
|
||||
if err := EnableKernelMemoryAccounting(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
}()
|
||||
|
||||
// We need to join memory cgroup after set memory limits, because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
func setMemory(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||
if !errors.Is(err, unix.EBUSY) {
|
||||
return err
|
||||
}
|
||||
|
||||
// EBUSY means the kernel can't set new limit as it's too low
|
||||
// (lower than the current usage). Return more specific error.
|
||||
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
}
|
||||
|
||||
func setSwap(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||
// If the memory update is set to -1 and the swap is not explicitly
|
||||
// set, we should also set swap to -1, it means unlimited memory.
|
||||
if cgroup.Resources.Memory == -1 && cgroup.Resources.MemorySwap == 0 {
|
||||
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
cgroup.Resources.MemorySwap = -1
|
||||
r.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -78,72 +89,53 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if err := setMemoryAndSwap(path, cgroup); err != nil {
|
||||
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||
if err := setMemoryAndSwap(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
|
||||
// ignore KernelMemory and KernelMemoryTCP
|
||||
|
||||
if r.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if r.OomKillDisable {
|
||||
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
} else if *r.MemorySwappiness <= 100 {
|
||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *r.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -162,7 +154,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
}
|
||||
@@ -212,8 +204,6 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.Memory != 0 ||
|
||||
cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.MemorySwap > 0 ||
|
||||
cgroup.Resources.KernelMemory > 0 ||
|
||||
cgroup.Resources.KernelMemoryTCP > 0 ||
|
||||
cgroup.Resources.OomKillDisable ||
|
||||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
|
||||
}
|
||||
@@ -234,7 +224,9 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as swap and kmem controllers
|
||||
// are optional in the kernel.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
@@ -242,25 +234,16 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
@@ -24,7 +24,7 @@ func (s *NameGroup) Apply(path string, d *cgroupData) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
@@ -21,9 +21,9 @@ func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != 0 {
|
||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.NetClsClassid != 0 {
|
||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
@@ -19,8 +19,8 @@ func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
||||
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, prioMap := range r.NetPrioIfpriomap {
|
||||
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
@@ -18,7 +18,7 @@ func (s *PerfEventGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
@@ -23,13 +23,13 @@ func (s *PidsGroup) Apply(path string, d *cgroupData) error {
|
||||
return join(path, d.pid)
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
if r.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
|
||||
|
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
@@ -12,15 +12,14 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpuSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.CpuWeight != 0 || cgroup.Resources.CpuQuota != 0 || cgroup.Resources.CpuPeriod != 0
|
||||
func isCpuSet(r *configs.Resources) bool {
|
||||
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0
|
||||
}
|
||||
|
||||
func setCpu(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isCpuSet(cgroup) {
|
||||
func setCpu(dirPath string, r *configs.Resources) error {
|
||||
if !isCpuSet(r) {
|
||||
return nil
|
||||
}
|
||||
r := cgroup.Resources
|
||||
|
||||
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
|
||||
if r.CpuWeight != 0 {
|
||||
@@ -57,7 +56,7 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -70,6 +69,15 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_usec":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
@@ -7,22 +7,22 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpusetSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.CpusetCpus != "" || cgroup.Resources.CpusetMems != ""
|
||||
func isCpusetSet(r *configs.Resources) bool {
|
||||
return r.CpusetCpus != "" || r.CpusetMems != ""
|
||||
}
|
||||
|
||||
func setCpuset(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isCpusetSet(cgroup) {
|
||||
func setCpuset(dirPath string, r *configs.Resources) error {
|
||||
if !isCpusetSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
if r.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
if r.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
@@ -10,7 +10,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func supportedControllers(cgroup *configs.Cgroup) (string, error) {
|
||||
func supportedControllers() (string, error) {
|
||||
return fscommon.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
|
||||
}
|
||||
|
||||
@@ -18,13 +18,13 @@ func supportedControllers(cgroup *configs.Cgroup) (string, error) {
|
||||
// based on (1) controllers available and (2) resources that are being set.
|
||||
// We don't check "pseudo" controllers such as
|
||||
// "freezer" and "devices".
|
||||
func needAnyControllers(cgroup *configs.Cgroup) (bool, error) {
|
||||
if cgroup == nil {
|
||||
func needAnyControllers(r *configs.Resources) (bool, error) {
|
||||
if r == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// list of all available controllers
|
||||
content, err := supportedControllers(cgroup)
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@@ -39,22 +39,22 @@ func needAnyControllers(cgroup *configs.Cgroup) (bool, error) {
|
||||
return ok
|
||||
}
|
||||
|
||||
if isPidsSet(cgroup) && have("pids") {
|
||||
if isPidsSet(r) && have("pids") {
|
||||
return true, nil
|
||||
}
|
||||
if isMemorySet(cgroup) && have("memory") {
|
||||
if isMemorySet(r) && have("memory") {
|
||||
return true, nil
|
||||
}
|
||||
if isIoSet(cgroup) && have("io") {
|
||||
if isIoSet(r) && have("io") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpuSet(cgroup) && have("cpu") {
|
||||
if isCpuSet(r) && have("cpu") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpusetSet(cgroup) && have("cpuset") {
|
||||
if isCpusetSet(r) && have("cpuset") {
|
||||
return true, nil
|
||||
}
|
||||
if isHugeTlbSet(cgroup) && have("hugetlb") {
|
||||
if isHugeTlbSet(r) && have("hugetlb") {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -64,8 +64,8 @@ func needAnyControllers(cgroup *configs.Cgroup) (bool, error) {
|
||||
// containsDomainController returns whether the current config contains domain controller or not.
|
||||
// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
|
||||
func containsDomainController(cg *configs.Cgroup) bool {
|
||||
return isMemorySet(cg) || isIoSet(cg) || isCpuSet(cg) || isHugeTlbSet(cg)
|
||||
func containsDomainController(r *configs.Resources) bool {
|
||||
return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r)
|
||||
}
|
||||
|
||||
// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
|
||||
@@ -74,7 +74,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
content, err := supportedControllers(c)
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -115,7 +115,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
// the controllers requested are thread-aware we can simply put the cgroup into
|
||||
// threaded mode.
|
||||
case "domain invalid":
|
||||
if containsDomainController(c) {
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
|
||||
} else {
|
||||
// Not entirely correct (in theory we'd always want to be a domain --
|
||||
@@ -129,7 +129,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
case "domain threaded":
|
||||
fallthrough
|
||||
case "threaded":
|
||||
if containsDomainController(c) {
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType)
|
||||
}
|
||||
}
|
||||
|
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
@@ -7,6 +7,8 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -26,26 +28,40 @@ func isRWM(perms devices.Permissions) bool {
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// the logic is from crun
|
||||
// https://github.com/containers/crun/blob/0.10.2/src/libcrun/cgroup.c#L1644-L1652
|
||||
func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
||||
for _, dev := range cgroup.Resources.Devices {
|
||||
if dev.Allow || !isRWM(dev.Permissions) {
|
||||
// This is similar to the logic applied in crun for handling errors from bpf(2)
|
||||
// <https://github.com/containers/crun/blob/0.17/src/libcrun/cgroup.c#L2438-L2470>.
|
||||
func canSkipEBPFError(r *configs.Resources) bool {
|
||||
// If we're running in a user namespace we can ignore eBPF rules because we
|
||||
// usually cannot use bpf(2), as well as rootless containers usually don't
|
||||
// have the necessary privileges to mknod(2) device inodes or access
|
||||
// host-level instances (though ideally we would be blocking device access
|
||||
// for rootless containers anyway).
|
||||
if userns.RunningInUserNS() {
|
||||
return true
|
||||
}
|
||||
|
||||
// We cannot ignore an eBPF load error if any rule if is a block rule or it
|
||||
// doesn't permit all access modes.
|
||||
//
|
||||
// NOTE: This will sometimes trigger in cases where access modes are split
|
||||
// between different rules but to handle this correctly would require
|
||||
// using ".../libcontainer/cgroup/devices".Emulator.
|
||||
for _, dev := range r.Devices {
|
||||
if !dev.Allow || !isRWM(dev.Permissions) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.SkipDevices {
|
||||
func setDevices(dirPath string, r *configs.Resources) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
// XXX: This is currently a white-list (but all callers pass a blacklist of
|
||||
// devices). This is bad for a whole variety of reasons, but will need
|
||||
// to be fixed with co-ordinated effort with downstreams.
|
||||
devices := cgroup.Devices
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
insts, license, err := devicefilter.DeviceFilter(r.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -66,7 +82,7 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
// programs. You could temporarily insert a deny-everything program
|
||||
// but that would result in spurrious failures during updates.
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
if !canSkipEBPFError(r) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
80
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
80
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
@@ -75,7 +75,7 @@ func (m *manager) Apply(pid int) error {
|
||||
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if m.rootless {
|
||||
if m.config.Path == "" {
|
||||
if blNeed, nErr := needAnyControllers(m.config); nErr == nil && !blNeed {
|
||||
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
|
||||
return nil
|
||||
}
|
||||
return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups")
|
||||
@@ -103,43 +103,27 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
)
|
||||
|
||||
st := cgroups.NewStats()
|
||||
if err := m.getControllers(); err != nil {
|
||||
return st, err
|
||||
}
|
||||
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := statPids(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
} else {
|
||||
if err := statPidsWithoutController(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := statPids(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := statMemory(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := statIo(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := statCpu(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// Note cpu.stat is available even if the controller is not enabled.
|
||||
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if _, ok := m.controllers["hugetlb"]; ok {
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
@@ -163,53 +147,50 @@ func (m *manager) Path(_ string) string {
|
||||
return m.dirPath
|
||||
}
|
||||
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container == nil || container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
func (m *manager) Set(r *configs.Resources) error {
|
||||
if err := m.getControllers(); err != nil {
|
||||
return err
|
||||
}
|
||||
// pids (since kernel 4.5)
|
||||
if err := setPids(m.dirPath, container.Cgroups); err != nil {
|
||||
if err := setPids(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := setMemory(m.dirPath, container.Cgroups); err != nil {
|
||||
if err := setMemory(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := setIo(m.dirPath, container.Cgroups); err != nil {
|
||||
if err := setIo(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if err := setCpu(m.dirPath, container.Cgroups); err != nil {
|
||||
if err := setCpu(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
//
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if err := setDevices(m.dirPath, container.Cgroups); err != nil && !m.rootless {
|
||||
if err := setDevices(m.dirPath, r); err != nil && !m.rootless {
|
||||
return err
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if err := setCpuset(m.dirPath, container.Cgroups); err != nil {
|
||||
if err := setCpuset(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := setHugeTlb(m.dirPath, container.Cgroups); err != nil {
|
||||
if err := setHugeTlb(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil {
|
||||
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := m.setUnified(container.Cgroups.Unified); err != nil {
|
||||
if err := m.setUnified(r.Unified); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config = container.Cgroups
|
||||
m.config.Resources = r
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -257,3 +238,16 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.dirPath)
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.dirPath)
|
||||
if err != nil && m.rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
||||
|
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
@@ -12,15 +12,15 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isHugeTlbSet(cgroup *configs.Cgroup) bool {
|
||||
return len(cgroup.Resources.HugetlbLimit) > 0
|
||||
func isHugeTlbSet(r *configs.Resources) bool {
|
||||
return len(r.HugetlbLimit) > 0
|
||||
}
|
||||
|
||||
func setHugeTlb(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isHugeTlbSet(cgroup) {
|
||||
func setHugeTlb(dirPath string, r *configs.Resources) error {
|
||||
if !isHugeTlbSet(r) {
|
||||
return nil
|
||||
}
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
if err := fscommon.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -44,14 +44,10 @@ func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
fileName := "hugetlb." + pagesize + ".events"
|
||||
contents, err := fscommon.ReadFile(dirPath, fileName)
|
||||
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to read stats")
|
||||
}
|
||||
_, value, err = fscommon.GetCgroupParamKeyValue(contents)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse "+fileName)
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pagesize] = hugetlbStats
|
||||
|
38
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
38
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
@@ -13,42 +13,50 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isIoSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.BlkioWeight != 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleReadBpsDevice) > 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleWriteIOPSDevice) > 0
|
||||
func isIoSet(r *configs.Resources) bool {
|
||||
return r.BlkioWeight != 0 ||
|
||||
len(r.BlkioThrottleReadBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteIOPSDevice) > 0
|
||||
}
|
||||
|
||||
func setIo(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isIoSet(cgroup) {
|
||||
func setIo(dirPath string, r *configs.Resources) error {
|
||||
if !isIoSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if r.BlkioWeight != 0 {
|
||||
filename := "io.bfq.weight"
|
||||
if err := fscommon.WriteFile(dirPath, filename,
|
||||
strconv.FormatUint(cgroups.ConvertBlkIOToCgroupV2Value(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
// if io.bfq.weight does not exist, then bfq module is not loaded.
|
||||
// Fallback to use io.weight with a conversion scheme
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
|
||||
if err := fscommon.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
|
108
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
108
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
@@ -4,13 +4,16 @@ package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// numToStr converts an int64 value to a string for writing to a
|
||||
@@ -30,21 +33,20 @@ func numToStr(value int64) (ret string) {
|
||||
return ret
|
||||
}
|
||||
|
||||
func isMemorySet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.Memory != 0 || cgroup.Resources.MemorySwap != 0
|
||||
func isMemorySet(r *configs.Resources) bool {
|
||||
return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0
|
||||
}
|
||||
|
||||
func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isMemorySet(cgroup) {
|
||||
func setMemory(dirPath string, r *configs.Resources) error {
|
||||
if !isMemorySet(r) {
|
||||
return nil
|
||||
}
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(cgroup.Resources.MemorySwap, cgroup.Resources.Memory)
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
swapStr := numToStr(swap)
|
||||
if swapStr == "" && swap == 0 && cgroup.Resources.MemorySwap > 0 {
|
||||
if swapStr == "" && swap == 0 && r.MemorySwap > 0 {
|
||||
// memory and memorySwap set to the same value -- disable swap
|
||||
swapStr = "0"
|
||||
}
|
||||
@@ -55,7 +57,7 @@ func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
}
|
||||
}
|
||||
|
||||
if val := numToStr(cgroup.Resources.Memory); val != "" {
|
||||
if val := numToStr(r.Memory); val != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -63,7 +65,7 @@ func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if val := numToStr(cgroup.Resources.MemoryReservation); val != "" {
|
||||
if val := numToStr(r.MemoryReservation); val != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.low", val); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -82,16 +84,24 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
|
||||
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
|
||||
// cgroup v2 is always hierarchical.
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
|
||||
// The root cgroup does not have memory.{current,max}
|
||||
// so emulate those using data from /proc/meminfo.
|
||||
return statsFromMeminfo(stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
@@ -99,9 +109,15 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// As cgroup v1 reports SwapUsage values as mem+swap combined,
|
||||
// while in cgroup v2 swap values do not include memory,
|
||||
// report combined mem+swap for v1 compatibility.
|
||||
swapUsage.Usage += memoryUsage.Usage
|
||||
if swapUsage.Limit != math.MaxUint64 {
|
||||
swapUsage.Limit += memoryUsage.Limit
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -117,7 +133,10 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore EEXIST as there's no swap accounting
|
||||
// if kernel CONFIG_MEMCG_SWAP is not set or
|
||||
// swapaccount=0 kernel boot parameter is given.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
|
||||
@@ -126,12 +145,69 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func statsFromMeminfo(stats *cgroups.Stats) error {
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Fields we are interested in.
|
||||
var (
|
||||
swap_free uint64
|
||||
swap_total uint64
|
||||
main_total uint64
|
||||
main_free uint64
|
||||
)
|
||||
mem := map[string]*uint64{
|
||||
"SwapFree": &swap_free,
|
||||
"SwapTotal": &swap_total,
|
||||
"MemTotal": &main_total,
|
||||
"MemFree": &main_free,
|
||||
}
|
||||
|
||||
found := 0
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.SplitN(sc.Text(), ":", 3)
|
||||
if len(parts) != 2 {
|
||||
// Should not happen.
|
||||
continue
|
||||
}
|
||||
k := parts[0]
|
||||
p, ok := mem[k]
|
||||
if !ok {
|
||||
// Unknown field -- not interested.
|
||||
continue
|
||||
}
|
||||
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
|
||||
*p, err = strconv.ParseUint(vStr, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parsing /proc/meminfo "+k)
|
||||
}
|
||||
|
||||
found++
|
||||
if found == len(mem) {
|
||||
// Got everything we need -- skip the rest.
|
||||
break
|
||||
}
|
||||
}
|
||||
if sc.Err() != nil {
|
||||
return sc.Err()
|
||||
}
|
||||
|
||||
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
|
||||
stats.MemoryStats.SwapUsage.Limit = math.MaxUint64
|
||||
|
||||
stats.MemoryStats.Usage.Usage = (main_total - main_free) * 1024
|
||||
stats.MemoryStats.Usage.Limit = math.MaxUint64
|
||||
|
||||
return nil
|
||||
}
|
||||
|
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
@@ -3,6 +3,7 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
@@ -13,15 +14,15 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isPidsSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.PidsLimit != 0
|
||||
func isPidsSet(r *configs.Resources) bool {
|
||||
return r.PidsLimit != 0
|
||||
}
|
||||
|
||||
func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isPidsSet(cgroup) {
|
||||
func setPids(dirPath string, r *configs.Resources) error {
|
||||
if !isPidsSet(r) {
|
||||
return nil
|
||||
}
|
||||
if val := numToStr(cgroup.Resources.PidsLimit); val != "" {
|
||||
if val := numToStr(r.PidsLimit); val != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "pids.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -30,7 +31,7 @@ func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := fscommon.ReadFile(dirPath, "cgroup.procs")
|
||||
@@ -40,13 +41,8 @@ func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := make(map[string]string)
|
||||
for _, i := range strings.Split(contents, "\n") {
|
||||
if i != "" {
|
||||
pids[i] = i
|
||||
}
|
||||
}
|
||||
stats.PidsStats.Current = uint64(len(pids))
|
||||
pids := strings.Count(contents, "\n")
|
||||
stats.PidsStats.Current = uint64(pids)
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
@@ -54,6 +50,9 @@ func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return statPidsFromCgroupProcs(dirPath, stats)
|
||||
}
|
||||
return errors.Wrap(err, "failed to parse pids.current")
|
||||
}
|
||||
|
||||
|
35
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/open.go
generated
vendored
35
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/open.go
generated
vendored
@@ -5,7 +5,6 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -17,7 +16,7 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
// Set to true by fs unit tests
|
||||
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||
TestMode bool
|
||||
|
||||
cgroupFd int = -1
|
||||
@@ -71,12 +70,12 @@ func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
flags |= os.O_TRUNC | os.O_CREATE
|
||||
mode = 0o600
|
||||
}
|
||||
if prepareOpenat2() != nil {
|
||||
return openFallback(dir, file, flags, mode)
|
||||
}
|
||||
reldir := strings.TrimPrefix(dir, cgroupfsPrefix)
|
||||
if len(reldir) == len(dir) { // non-standard path, old system?
|
||||
return openWithSecureJoin(dir, file, flags, mode)
|
||||
}
|
||||
if prepareOpenat2() != nil {
|
||||
return openWithSecureJoin(dir, file, flags, mode)
|
||||
return openFallback(dir, file, flags, mode)
|
||||
}
|
||||
|
||||
relname := reldir + "/" + file
|
||||
@@ -93,11 +92,29 @@ func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil
|
||||
}
|
||||
|
||||
func openWithSecureJoin(dir, file string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||
|
||||
// openFallback is used when openat2(2) is not available. It checks the opened
|
||||
// file is on cgroupfs, returning an error otherwise.
|
||||
func openFallback(dir, file string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
path := dir + "/" + file
|
||||
fd, err := os.OpenFile(path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if TestMode {
|
||||
return fd, nil
|
||||
}
|
||||
// Check this is a cgroupfs file.
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||
}
|
||||
|
||||
return os.OpenFile(path, flags, mode)
|
||||
return fd, nil
|
||||
}
|
||||
|
50
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
50
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
@@ -35,22 +35,42 @@ func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamKeyValue parses a space-separated "name value" kind of cgroup
|
||||
// parameter and returns its components. For example, "io_service_bytes 1234"
|
||||
// will return as "io_service_bytes", 1234.
|
||||
func GetCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.Fields(t)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
default:
|
||||
return "", 0, ErrNotValidFormat
|
||||
// ParseKeyValue parses a space-separated "name value" kind of cgroup
|
||||
// parameter and returns its key as a string, and its value as uint64
|
||||
// (ParseUint is used to convert the value). For example,
|
||||
// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
|
||||
func ParseKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.SplitN(t, " ", 3)
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("line %q is not in key value format", t)
|
||||
}
|
||||
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
}
|
||||
|
||||
// GetValueByKey reads a key-value pairs from the specified cgroup file,
|
||||
// and returns a value of the specified key. ParseUint is used for value
|
||||
// conversion.
|
||||
func GetValueByKey(path, file, key string) (uint64, error) {
|
||||
content, err := ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
return ParseUint(arr[1], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
|
||||
|
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
@@ -2,6 +2,7 @@ package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
@@ -28,10 +29,6 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
connOnce sync.Once
|
||||
connDbus *systemdDbus.Conn
|
||||
connErr error
|
||||
|
||||
versionOnce sync.Once
|
||||
version int
|
||||
|
||||
@@ -291,19 +288,6 @@ func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, er
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// getDbusConnection lazy initializes systemd dbus connection
|
||||
// and returns it
|
||||
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
|
||||
connOnce.Do(func() {
|
||||
if rootless {
|
||||
connDbus, connErr = NewUserSystemdDbus()
|
||||
} else {
|
||||
connDbus, connErr = systemdDbus.New()
|
||||
}
|
||||
})
|
||||
return connDbus, connErr
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
@@ -319,32 +303,42 @@ func getUnitName(c *configs.Cgroup) string {
|
||||
return c.Name
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
// isDbusError returns true if the error is a specific dbus error.
|
||||
func isDbusError(err error, name string) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
var derr *dbus.Error
|
||||
if errors.As(err, &derr) {
|
||||
return strings.Contains(derr.Name, name)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error {
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
|
||||
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property) error {
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
|
||||
return err
|
||||
})
|
||||
if err == nil {
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit
|
||||
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
dbusConnection.ResetFailedUnit(unitName)
|
||||
resetFailedUnit(cm, unitName)
|
||||
return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
dbusConnection.ResetFailedUnit(unitName)
|
||||
resetFailedUnit(cm, unitName)
|
||||
return errors.New("Timeout waiting for systemd to create " + unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
@@ -354,13 +348,17 @@ func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []s
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
|
||||
func stopUnit(cm *dbusConnManager, unitName string) error {
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil {
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
|
||||
return err
|
||||
})
|
||||
if err == nil {
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||
}
|
||||
@@ -371,10 +369,38 @@ func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func systemdVersion(conn *systemdDbus.Conn) int {
|
||||
func resetFailedUnit(cm *dbusConnManager, name string) {
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.ResetFailedUnitContext(context.TODO(), name)
|
||||
})
|
||||
if err != nil {
|
||||
logrus.Warnf("unable to reset failed unit: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
|
||||
})
|
||||
}
|
||||
|
||||
func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
|
||||
str := ""
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
var err error
|
||||
str, err = c.GetManagerProperty(name)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strconv.Unquote(str)
|
||||
}
|
||||
|
||||
func systemdVersion(cm *dbusConnManager) int {
|
||||
versionOnce.Do(func() {
|
||||
version = -1
|
||||
verStr, err := conn.GetManagerProperty("Version")
|
||||
verStr, err := getManagerProperty(cm, "Version")
|
||||
if err == nil {
|
||||
version, err = systemdVersionAtoi(verStr)
|
||||
}
|
||||
@@ -389,11 +415,11 @@ func systemdVersion(conn *systemdDbus.Conn) int {
|
||||
|
||||
func systemdVersionAtoi(verStr string) (int, error) {
|
||||
// verStr should be of the form:
|
||||
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32"
|
||||
// all the input strings include quotes, and the output int should be 245
|
||||
// thus, we unconditionally remove the `"v`
|
||||
// and then match on the first integer we can grab
|
||||
re := regexp.MustCompile(`"?v?([0-9]+)`)
|
||||
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32" (without quotes).
|
||||
// The result for all of the above should be 245.
|
||||
// Thus, we unconditionally remove the "v" prefix
|
||||
// and then match on the first integer we can grab.
|
||||
re := regexp.MustCompile(`v?([0-9]+)`)
|
||||
matches := re.FindStringSubmatch(verStr)
|
||||
if len(matches) < 2 {
|
||||
return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||
@@ -402,10 +428,10 @@ func systemdVersionAtoi(verStr string) (int, error) {
|
||||
return ver, errors.Wrapf(err, "can't parse version %s", verStr)
|
||||
}
|
||||
|
||||
func addCpuQuota(conn *systemdDbus.Conn, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
if period != 0 {
|
||||
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||
sdVer := systemdVersion(conn)
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 242 {
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPeriodUSec", period))
|
||||
@@ -436,13 +462,13 @@ func addCpuQuota(conn *systemdDbus.Conn, properties *[]systemdDbus.Property, quo
|
||||
}
|
||||
}
|
||||
|
||||
func addCpuset(conn *systemdDbus.Conn, props *[]systemdDbus.Property, cpus, mems string) error {
|
||||
func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
|
||||
if cpus == "" && mems == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
|
||||
sdVer := systemdVersion(conn)
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer < 244 {
|
||||
logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
|
||||
" (settings will still be applied to cgroupfs)", sdVer)
|
||||
|
96
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
96
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
)
|
||||
|
||||
var (
|
||||
dbusC *systemdDbus.Conn
|
||||
dbusMu sync.RWMutex
|
||||
dbusInited bool
|
||||
dbusRootless bool
|
||||
)
|
||||
|
||||
type dbusConnManager struct {
|
||||
}
|
||||
|
||||
// newDbusConnManager initializes systemd dbus connection manager.
|
||||
func newDbusConnManager(rootless bool) *dbusConnManager {
|
||||
if dbusInited && rootless != dbusRootless {
|
||||
panic("can't have both root and rootless dbus")
|
||||
}
|
||||
dbusRootless = rootless
|
||||
return &dbusConnManager{}
|
||||
}
|
||||
|
||||
// getConnection lazily initializes and returns systemd dbus connection.
|
||||
func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) {
|
||||
// In the case where dbusC != nil
|
||||
// Use the read lock the first time to ensure
|
||||
// that Conn can be acquired at the same time.
|
||||
dbusMu.RLock()
|
||||
if conn := dbusC; conn != nil {
|
||||
dbusMu.RUnlock()
|
||||
return conn, nil
|
||||
}
|
||||
dbusMu.RUnlock()
|
||||
|
||||
// In the case where dbusC == nil
|
||||
// Use write lock to ensure that only one
|
||||
// will be created
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if conn := dbusC; conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
conn, err := d.newConnection()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dbusC = conn
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
|
||||
if dbusRootless {
|
||||
return newUserSystemdDbus()
|
||||
}
|
||||
return systemdDbus.NewWithContext(context.TODO())
|
||||
}
|
||||
|
||||
// resetConnection resets the connection to its initial state
|
||||
// (so it can be reconnected if necessary).
|
||||
func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusC != nil && dbusC == conn {
|
||||
dbusC.Close()
|
||||
dbusC = nil
|
||||
}
|
||||
}
|
||||
|
||||
var errDbusConnClosed = dbus.ErrClosed.Error()
|
||||
|
||||
// retryOnDisconnect calls op, and if the error it returns is about closed dbus
|
||||
// connection, the connection is re-established and the op is retried. This helps
|
||||
// with the situation when dbus is restarted and we have a stale connection.
|
||||
func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error {
|
||||
for {
|
||||
conn, err := d.getConnection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = op(conn)
|
||||
if !isDbusError(err, errDbusConnClosed) {
|
||||
return err
|
||||
}
|
||||
d.resetConnection(conn)
|
||||
}
|
||||
}
|
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
@@ -13,12 +13,12 @@ import (
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
// newUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func newUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
addr, err := DetectUserDbusSessionBusAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -52,7 +52,7 @@ func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
//
|
||||
// Otherwise returns os.Getuid() .
|
||||
func DetectUID() (int, error) {
|
||||
if !system.RunningInUserNS() {
|
||||
if !userns.RunningInUserNS() {
|
||||
return os.Getuid(), nil
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
|
91
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
91
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
@@ -12,7 +12,6 @@ import (
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -21,12 +20,14 @@ type legacyManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
dbus *dbusConnManager
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &legacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
dbus: newDbusConnManager(false),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,8 +36,8 @@ type subsystem interface {
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
// Set sets cgroup resource limits.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
@@ -57,9 +58,8 @@ var legacySubsystems = []subsystem{
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) {
|
||||
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
r := c.Resources
|
||||
|
||||
deviceProperties, err := generateDeviceProperties(r.Devices)
|
||||
if err != nil {
|
||||
@@ -77,7 +77,7 @@ func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst
|
||||
newProp("CPUShares", r.CpuShares))
|
||||
}
|
||||
|
||||
addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
@@ -86,11 +86,10 @@ func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(conn, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -158,32 +157,17 @@ func (m *legacyManager) Apply(pid int) error {
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
newProp("BlockIOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resourcesProperties, err := genV1ResourcesProperties(c, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties = append(properties, resourcesProperties...)
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := enableKmem(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
||||
if err := startUnit(m.dbus, unitName, properties); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -221,13 +205,8 @@ func (m *legacyManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
unitName := getUnitName(m.cgroups)
|
||||
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
|
||||
|
||||
stopErr := stopUnit(dbusConnection, unitName)
|
||||
// Both on success and on error, cleanup all the cgroups we are aware of.
|
||||
// Some of them were created directly by Apply() and are not managed by systemd.
|
||||
if err := cgroups.RemovePaths(m.paths); err != nil {
|
||||
@@ -252,7 +231,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
|
||||
case "cpuset":
|
||||
if path, ok := m.paths[name]; ok {
|
||||
s := &fs.CpusetGroup{}
|
||||
if err := s.ApplyDir(path, m.cgroups, pid); err != nil {
|
||||
if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -305,7 +284,7 @@ func (m *legacyManager) Freeze(state configs.FreezerState) error {
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer := &fs.FreezerGroup{}
|
||||
if err := freezer.Set(path, m.cgroups); err != nil {
|
||||
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
@@ -345,20 +324,16 @@ func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) Set(container *configs.Config) error {
|
||||
func (m *legacyManager) Set(r *configs.Resources) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
if container.Cgroups.Resources.Unified != nil {
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties, err := genV1ResourcesProperties(container.Cgroups, dbusConnection)
|
||||
properties, err := genV1ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -386,7 +361,7 @@ func (m *legacyManager) Set(container *configs.Config) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
|
||||
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return err
|
||||
}
|
||||
@@ -401,7 +376,7 @@ func (m *legacyManager) Set(container *configs.Config) error {
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -409,30 +384,6 @@ func (m *legacyManager) Set(container *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func enableKmem(c *configs.Cgroup) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// do not try to enable the kernel memory if we already have
|
||||
// tasks in the cgroup.
|
||||
content, err := fscommon.ReadFile(path, "tasks")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(content) > 0 {
|
||||
return nil
|
||||
}
|
||||
return fs.EnableKernelMemoryAccounting(path)
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
@@ -455,3 +406,7 @@ func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
func (m *legacyManager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *legacyManager) OOMKillCount() (uint64, error) {
|
||||
return fs.OOMKillCount(m.Path("memory"))
|
||||
}
|
||||
|
77
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
77
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
@@ -26,6 +26,7 @@ type unifiedManager struct {
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
rootless bool
|
||||
dbus *dbusConnManager
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
|
||||
@@ -33,6 +34,7 @@ func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgrou
|
||||
cgroups: config,
|
||||
path: path,
|
||||
rootless: rootless,
|
||||
dbus: newDbusConnManager(rootless),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +47,7 @@ func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgrou
|
||||
// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
//
|
||||
// For the list of systemd unit properties, see systemd.resource-control(5).
|
||||
func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (props []systemdDbus.Property, _ error) {
|
||||
func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) {
|
||||
var err error
|
||||
|
||||
for k, v := range res {
|
||||
@@ -83,7 +85,7 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr
|
||||
return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
addCpuQuota(conn, &props, quota, period)
|
||||
addCpuQuota(cm, &props, quota, period)
|
||||
|
||||
case "cpu.weight":
|
||||
num, err := strconv.ParseUint(v, 10, 64)
|
||||
@@ -103,7 +105,7 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr
|
||||
"cpuset.mems": "AllowedMemoryNodes",
|
||||
}
|
||||
// systemd only supports these properties since v244
|
||||
sdVer := systemdVersion(conn)
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 244 {
|
||||
props = append(props,
|
||||
newProp(m[k], bits))
|
||||
@@ -141,7 +143,6 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr
|
||||
}
|
||||
}
|
||||
props = append(props,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", num))
|
||||
|
||||
case "memory.oom.group":
|
||||
@@ -163,9 +164,8 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr
|
||||
return props, nil
|
||||
}
|
||||
|
||||
func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) {
|
||||
func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
r := c.Resources
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
@@ -201,15 +201,14 @@ func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst
|
||||
newProp("CPUWeight", r.CpuWeight))
|
||||
}
|
||||
|
||||
addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(conn, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -218,7 +217,7 @@ func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst
|
||||
|
||||
// convert Resources.Unified map to systemd properties
|
||||
if r.Unified != nil {
|
||||
unifiedProps, err := unifiedResToSystemdProps(conn, r.Unified)
|
||||
unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -273,28 +272,21 @@ func (m *unifiedManager) Apply(pid int) error {
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("IOAccounting", true))
|
||||
newProp("IOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resourcesProperties, err := genV2ResourcesProperties(c, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties = append(properties, resourcesProperties...)
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
||||
if err := startUnit(m.dbus, unitName, properties); err != nil {
|
||||
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
|
||||
}
|
||||
|
||||
if err = m.initPath(); err != nil {
|
||||
if err := m.initPath(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
@@ -310,17 +302,13 @@ func (m *unifiedManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(dbusConnection, unitName); err != nil {
|
||||
if err := stopUnit(m.dbus, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX this is probably not needed, systemd should handle it
|
||||
err = os.Remove(m.path)
|
||||
err := os.Remove(m.path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
@@ -329,6 +317,7 @@ func (m *unifiedManager) Destroy() error {
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Path(_ string) string {
|
||||
_ = m.initPath()
|
||||
return m.path
|
||||
}
|
||||
|
||||
@@ -349,16 +338,8 @@ func (m *unifiedManager) getSliceFull() (string, error) {
|
||||
}
|
||||
|
||||
if m.rootless {
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
|
||||
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
managerCG, err := strconv.Unquote(managerCGQuoted)
|
||||
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
|
||||
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -431,12 +412,8 @@ func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Set(container *configs.Config) error {
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(m.cgroups, dbusConnection)
|
||||
func (m *unifiedManager) Set(r *configs.Resources) error {
|
||||
properties, err := genV2ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -464,7 +441,7 @@ func (m *unifiedManager) Set(container *configs.Config) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
|
||||
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return errors.Wrap(err, "error while setting unit properties")
|
||||
}
|
||||
@@ -477,7 +454,7 @@ func (m *unifiedManager) Set(container *configs.Config) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
return fsMgr.Set(r)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPaths() map[string]string {
|
||||
@@ -501,3 +478,11 @@ func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
func (m *unifiedManager) Exists() bool {
|
||||
return cgroups.PathExists(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) OOMKillCount() (uint64, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return fsMgr.OOMKillCount()
|
||||
}
|
||||
|
26
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
26
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
@@ -16,7 +16,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -37,7 +37,7 @@ func IsCgroup2UnifiedMode() bool {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(unifiedMountpoint, &st)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) && system.RunningInUserNS() {
|
||||
if os.IsNotExist(err) && userns.RunningInUserNS() {
|
||||
// ignore the "not found" error if running in userns
|
||||
logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint)
|
||||
isUnified = false
|
||||
@@ -400,17 +400,6 @@ func WriteCgroupProc(dir string, pid int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||
// convert linearly from [10-1000] to [1-10000]
|
||||
func ConvertBlkIOToCgroupV2Value(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
|
||||
@@ -450,3 +439,14 @@ func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
||||
|
||||
return memorySwap - memory, nil
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||
// convert linearly from [10-1000] to [1-10000]
|
||||
func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
|
||||
}
|
||||
|
6
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
@@ -54,12 +54,6 @@ type Resources struct {
|
||||
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
|
||||
MemorySwap int64 `json:"memory_swap"`
|
||||
|
||||
// Kernel memory limit (in bytes)
|
||||
KernelMemory int64 `json:"kernel_memory"`
|
||||
|
||||
// Kernel memory limit for TCP use (in bytes)
|
||||
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
|
||||
|
||||
// CPU shares (relative weight vs. other containers)
|
||||
CpuShares uint64 `json:"cpu_shares"`
|
||||
|
||||
|
19
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
19
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
@@ -31,9 +31,10 @@ type IDMap struct {
|
||||
// for syscalls. Additional architectures can be added by specifying them in
|
||||
// Architectures.
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
DefaultErrnoRet *uint `json:"default_errno_ret"`
|
||||
}
|
||||
|
||||
// Action is taken upon rule match in Seccomp
|
||||
@@ -222,25 +223,25 @@ const (
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateRuntime is called immediately after the deprecated Prestart hook.
|
||||
// CreateRuntime commands are called in the Runtime Namespace.
|
||||
CreateRuntime = "createRuntime"
|
||||
CreateRuntime HookName = "createRuntime"
|
||||
|
||||
// CreateContainer commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateContainer commands are called in the Container namespace.
|
||||
CreateContainer = "createContainer"
|
||||
CreateContainer HookName = "createContainer"
|
||||
|
||||
// StartContainer commands MUST be called as part of the start operation and before
|
||||
// the container process is started.
|
||||
// StartContainer commands are called in the Container namespace.
|
||||
StartContainer = "startContainer"
|
||||
StartContainer HookName = "startContainer"
|
||||
|
||||
// Poststart commands are executed after the container init process starts.
|
||||
// Poststart commands are called in the Runtime Namespace.
|
||||
Poststart = "poststart"
|
||||
Poststart HookName = "poststart"
|
||||
|
||||
// Poststop commands are executed after the container init process exits.
|
||||
// Poststop commands are called in the Runtime Namespace.
|
||||
Poststop = "poststop"
|
||||
Poststop HookName = "poststop"
|
||||
)
|
||||
|
||||
type Capabilities struct {
|
||||
@@ -387,7 +388,7 @@ func (c Command) Run(s *specs.State) error {
|
||||
return err
|
||||
case <-timerCh:
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
<-errC
|
||||
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
||||
}
|
||||
}
|
||||
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
Normal file
9
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
// +build gofuzz
|
||||
|
||||
package configs
|
||||
|
||||
func FuzzUnmarshalJSON(data []byte) int {
|
||||
hooks := Hooks{}
|
||||
_ = hooks.UnmarshalJSON(data)
|
||||
return 1
|
||||
}
|
3
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
@@ -11,6 +11,9 @@ import (
|
||||
// rootlessEUID makes sure that the config can be applied when runc
|
||||
// is being executed as a non-root user (euid != 0) in the current user namespace.
|
||||
func (v *ConfigValidator) rootlessEUID(config *configs.Config) error {
|
||||
if !config.RootlessEUID {
|
||||
return nil
|
||||
}
|
||||
if err := rootlessEUIDMappings(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
84
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
84
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
@@ -25,36 +26,30 @@ func New() Validator {
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
type check func(config *configs.Config) error
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
checks := []check{
|
||||
v.rootfs,
|
||||
v.network,
|
||||
v.hostname,
|
||||
v.security,
|
||||
v.usernamespace,
|
||||
v.cgroupnamespace,
|
||||
v.sysctl,
|
||||
v.intelrdt,
|
||||
v.rootlessEUID,
|
||||
v.mounts,
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.usernamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.cgroupnamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.sysctl(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.intelrdt(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.RootlessEUID {
|
||||
if err := v.rootlessEUID(config); err != nil {
|
||||
for _, c := range checks {
|
||||
if err := c(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := v.cgroups(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -223,6 +218,45 @@ func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) cgroups(config *configs.Config) error {
|
||||
c := config.Cgroups
|
||||
if c == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
|
||||
r := c.Resources
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !cgroups.IsCgroup2UnifiedMode() && r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
_, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) mounts(config *configs.Config) error {
|
||||
for _, m := range config.Mounts {
|
||||
if !filepath.IsAbs(m.Destination) {
|
||||
return fmt.Errorf("invalid mount %+v: mount destination not absolute", m)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isHostNetNS(path string) (bool, error) {
|
||||
const currentProcessNetns = "/proc/self/ns/net"
|
||||
|
||||
|
98
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
98
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
@@ -27,13 +27,13 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/checkpoint-restore/go-criu/v4"
|
||||
criurpc "github.com/checkpoint-restore/go-criu/v4/rpc"
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/checkpoint-restore/go-criu/v5"
|
||||
criurpc "github.com/checkpoint-restore/go-criu/v5/rpc"
|
||||
errorsf "github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
"google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
const stdioFdCount = 3
|
||||
@@ -55,6 +55,7 @@ type linuxContainer struct {
|
||||
criuVersion int
|
||||
state containerState
|
||||
created time.Time
|
||||
fifo *os.File
|
||||
}
|
||||
|
||||
// State represents a running container's state
|
||||
@@ -224,9 +225,9 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
if status == Stopped {
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
}
|
||||
if err := c.cgroupManager.Set(&config); err != nil {
|
||||
if err := c.cgroupManager.Set(config.Cgroups.Resources); err != nil {
|
||||
// Set configs back
|
||||
if err2 := c.cgroupManager.Set(c.config); err2 != nil {
|
||||
if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil {
|
||||
logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
|
||||
}
|
||||
return err
|
||||
@@ -234,7 +235,7 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
if c.intelRdtManager != nil {
|
||||
if err := c.intelRdtManager.Set(&config); err != nil {
|
||||
// Set configs back
|
||||
if err2 := c.cgroupManager.Set(c.config); err2 != nil {
|
||||
if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil {
|
||||
logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
|
||||
}
|
||||
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
|
||||
@@ -357,17 +358,30 @@ type openResult struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (c *linuxContainer) start(process *Process) error {
|
||||
func (c *linuxContainer) start(process *Process) (retErr error) {
|
||||
parent, err := c.newParentProcess(process)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
}
|
||||
parent.forwardChildLogs()
|
||||
|
||||
logsDone := parent.forwardChildLogs()
|
||||
if logsDone != nil {
|
||||
defer func() {
|
||||
// Wait for log forwarder to finish. This depends on
|
||||
// runc init closing the _LIBCONTAINER_LOGPIPE log fd.
|
||||
err := <-logsDone
|
||||
if err != nil && retErr == nil {
|
||||
retErr = newSystemErrorWithCause(err, "forwarding init logs")
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if err := parent.start(); err != nil {
|
||||
return newSystemErrorWithCause(err, "starting container process")
|
||||
}
|
||||
|
||||
if process.Init {
|
||||
c.fifo.Close()
|
||||
if c.config.Hooks != nil {
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
@@ -443,12 +457,13 @@ func (c *linuxContainer) deleteExecFifo() {
|
||||
// fd, with _LIBCONTAINER_FIFOFD set to its fd number.
|
||||
func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
|
||||
fifoName := filepath.Join(c.root, execFifoFilename)
|
||||
fifoFd, err := unix.Open(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.fifo = fifo
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, os.NewFile(uintptr(fifoFd), fifoName))
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, fifo)
|
||||
cmd.Env = append(cmd.Env,
|
||||
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
return nil
|
||||
@@ -570,6 +585,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
messageSockPair: messageSockPair,
|
||||
logFilePair: logFilePair,
|
||||
manager: c.cgroupManager,
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
@@ -594,6 +610,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
CreateConsole: process.ConsoleSocket != nil,
|
||||
ConsoleWidth: process.ConsoleWidth,
|
||||
ConsoleHeight: process.ConsoleHeight,
|
||||
}
|
||||
if process.NoNewPrivileges != nil {
|
||||
cfg.NoNewPrivileges = *process.NoNewPrivileges
|
||||
@@ -607,9 +626,10 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
if len(process.Rlimits) > 0 {
|
||||
cfg.Rlimits = process.Rlimits
|
||||
}
|
||||
cfg.CreateConsole = process.ConsoleSocket != nil
|
||||
cfg.ConsoleWidth = process.ConsoleWidth
|
||||
cfg.ConsoleHeight = process.ConsoleHeight
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
cfg.Cgroup2Path = c.cgroupManager.Path("")
|
||||
}
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -701,7 +721,6 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
return errors.New("CRIU feature check failed")
|
||||
}
|
||||
|
||||
logrus.Debugf("Feature check says: %s", criuFeatures)
|
||||
missingFeatures := false
|
||||
|
||||
// The outer if checks if the fields actually exist
|
||||
@@ -1198,7 +1217,6 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Destination = dest
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1235,11 +1253,46 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
||||
// Now go through all mounts and create the mountpoints
|
||||
// if the mountpoints are not on a tmpfs, as CRIU will
|
||||
// restore the complete tmpfs content from its checkpoint.
|
||||
umounts := []string{}
|
||||
defer func() {
|
||||
for _, u := range umounts {
|
||||
_ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error {
|
||||
if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil {
|
||||
if e != unix.EINVAL {
|
||||
// Ignore EINVAL as it means 'target is not a mount point.'
|
||||
// It probably has already been unmounted.
|
||||
logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}()
|
||||
for _, m := range mounts {
|
||||
if !isPathInPrefixList(m.Destination, tmpfs) {
|
||||
if err := c.makeCriuRestoreMountpoints(m); err != nil {
|
||||
return err
|
||||
}
|
||||
// If the mount point is a bind mount, we need to mount
|
||||
// it now so that runc can create the necessary mount
|
||||
// points for mounts in bind mounts.
|
||||
// This also happens during initial container creation.
|
||||
// Without this CRIU restore will fail
|
||||
// See: https://github.com/opencontainers/runc/issues/2748
|
||||
// It is also not necessary to order the mount points
|
||||
// because during initial container creation mounts are
|
||||
// set up in the order they are configured.
|
||||
if m.Device == "bind" {
|
||||
if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(procfd string) error {
|
||||
if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
return errorsf.Wrapf(err, "unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd)
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
umounts = append(umounts, m.Destination)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -1416,7 +1469,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.cgroupManager.Set(c.config); err != nil {
|
||||
if err := c.cgroupManager.Set(c.config.Cgroups.Resources); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
@@ -1475,7 +1528,6 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
// the initial CRIU run to detect the version. Skip it.
|
||||
logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath)
|
||||
}
|
||||
logrus.Debugf("Using CRIU with following args: %s", args)
|
||||
cmd := exec.Command(c.criuPath, args...)
|
||||
if process != nil {
|
||||
cmd.Stdin = process.Stdin
|
||||
@@ -1523,19 +1575,19 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
// should be empty. For older CRIU versions it still will be
|
||||
// available but empty. criurpc.CriuReqType_VERSION actually
|
||||
// has no req.GetOpts().
|
||||
if !(req.GetType() == criurpc.CriuReqType_FEATURE_CHECK ||
|
||||
req.GetType() == criurpc.CriuReqType_VERSION) {
|
||||
if logrus.GetLevel() >= logrus.DebugLevel &&
|
||||
!(req.GetType() == criurpc.CriuReqType_FEATURE_CHECK ||
|
||||
req.GetType() == criurpc.CriuReqType_VERSION) {
|
||||
|
||||
val := reflect.ValueOf(req.GetOpts())
|
||||
v := reflect.Indirect(val)
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
st := v.Type()
|
||||
name := st.Field(i).Name
|
||||
if strings.HasPrefix(name, "XXX_") {
|
||||
continue
|
||||
if 'A' <= name[0] && name[0] <= 'Z' {
|
||||
value := val.MethodByName("Get" + name).Call([]reflect.Value{})
|
||||
logrus.Debugf("CRIU option %s with value %v", name, value[0])
|
||||
}
|
||||
value := val.MethodByName("Get" + name).Call([]reflect.Value{})
|
||||
logrus.Debugf("CRIU option %s with value %v", name, value[0])
|
||||
}
|
||||
}
|
||||
data, err := proto.Marshal(req)
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
@@ -1,6 +1,6 @@
|
||||
package libcontainer
|
||||
|
||||
import criu "github.com/checkpoint-restore/go-criu/v4/rpc"
|
||||
import criu "github.com/checkpoint-restore/go-criu/v5/rpc"
|
||||
|
||||
type CriuPageServerInfo struct {
|
||||
Address string // IP address of CRIU page server
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/devices/device.go
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/devices/device.go
generated
vendored
@@ -168,3 +168,7 @@ func (d *Rule) CgroupString() string {
|
||||
}
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
||||
}
|
||||
|
||||
func (d *Rule) Mkdev() (uint64, error) {
|
||||
return mkDev(d)
|
||||
}
|
||||
|
107
vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
generated
vendored
107
vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
generated
vendored
@@ -4,13 +4,118 @@ package devices
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func (d *Rule) Mkdev() (uint64, error) {
|
||||
var (
|
||||
// ErrNotADevice denotes that a file is not a valid linux device.
|
||||
ErrNotADevice = errors.New("not a device node")
|
||||
)
|
||||
|
||||
// Testing dependencies
|
||||
var (
|
||||
unixLstat = unix.Lstat
|
||||
ioutilReadDir = ioutil.ReadDir
|
||||
)
|
||||
|
||||
func mkDev(d *Rule) (uint64, error) {
|
||||
if d.Major == Wildcard || d.Minor == Wildcard {
|
||||
return 0, errors.New("cannot mkdev() device with wildcards")
|
||||
}
|
||||
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
|
||||
}
|
||||
|
||||
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the
|
||||
// information about a linux device and return that information as a Device struct.
|
||||
func DeviceFromPath(path, permissions string) (*Device, error) {
|
||||
var stat unix.Stat_t
|
||||
err := unixLstat(path, &stat)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
devType Type
|
||||
mode = stat.Mode
|
||||
devNumber = uint64(stat.Rdev)
|
||||
major = unix.Major(devNumber)
|
||||
minor = unix.Minor(devNumber)
|
||||
)
|
||||
switch mode & unix.S_IFMT {
|
||||
case unix.S_IFBLK:
|
||||
devType = BlockDevice
|
||||
case unix.S_IFCHR:
|
||||
devType = CharDevice
|
||||
case unix.S_IFIFO:
|
||||
devType = FifoDevice
|
||||
default:
|
||||
return nil, ErrNotADevice
|
||||
}
|
||||
return &Device{
|
||||
Rule: Rule{
|
||||
Type: devType,
|
||||
Major: int64(major),
|
||||
Minor: int64(minor),
|
||||
Permissions: Permissions(permissions),
|
||||
},
|
||||
Path: path,
|
||||
FileMode: os.FileMode(mode &^ unix.S_IFMT),
|
||||
Uid: stat.Uid,
|
||||
Gid: stat.Gid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HostDevices returns all devices that can be found under /dev directory.
|
||||
func HostDevices() ([]*Device, error) {
|
||||
return GetDevices("/dev")
|
||||
}
|
||||
|
||||
// GetDevices recursively traverses a directory specified by path
|
||||
// and returns all devices found there.
|
||||
func GetDevices(path string) ([]*Device, error) {
|
||||
files, err := ioutilReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var out []*Device
|
||||
for _, f := range files {
|
||||
switch {
|
||||
case f.IsDir():
|
||||
switch f.Name() {
|
||||
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
|
||||
// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
|
||||
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
|
||||
continue
|
||||
default:
|
||||
sub, err := GetDevices(filepath.Join(path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out = append(out, sub...)
|
||||
continue
|
||||
}
|
||||
case f.Name() == "console":
|
||||
continue
|
||||
}
|
||||
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
|
||||
if err != nil {
|
||||
if err == ErrNotADevice {
|
||||
continue
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if device.Type == FifoDevice {
|
||||
continue
|
||||
}
|
||||
out = append(out, device)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
5
vendor/github.com/opencontainers/runc/libcontainer/devices/device_windows.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/devices/device_windows.go
generated
vendored
@@ -1,5 +0,0 @@
|
||||
package devices
|
||||
|
||||
func (d *Rule) Mkdev() (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
112
vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go
generated
vendored
112
vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go
generated
vendored
@@ -1,112 +0,0 @@
|
||||
package devices
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotADevice denotes that a file is not a valid linux device.
|
||||
ErrNotADevice = errors.New("not a device node")
|
||||
)
|
||||
|
||||
// Testing dependencies
|
||||
var (
|
||||
unixLstat = unix.Lstat
|
||||
ioutilReadDir = ioutil.ReadDir
|
||||
)
|
||||
|
||||
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the
|
||||
// information about a linux device and return that information as a Device struct.
|
||||
func DeviceFromPath(path, permissions string) (*Device, error) {
|
||||
var stat unix.Stat_t
|
||||
err := unixLstat(path, &stat)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
devType Type
|
||||
mode = stat.Mode
|
||||
devNumber = uint64(stat.Rdev)
|
||||
major = unix.Major(devNumber)
|
||||
minor = unix.Minor(devNumber)
|
||||
)
|
||||
switch mode & unix.S_IFMT {
|
||||
case unix.S_IFBLK:
|
||||
devType = BlockDevice
|
||||
case unix.S_IFCHR:
|
||||
devType = CharDevice
|
||||
case unix.S_IFIFO:
|
||||
devType = FifoDevice
|
||||
default:
|
||||
return nil, ErrNotADevice
|
||||
}
|
||||
return &Device{
|
||||
Rule: Rule{
|
||||
Type: devType,
|
||||
Major: int64(major),
|
||||
Minor: int64(minor),
|
||||
Permissions: Permissions(permissions),
|
||||
},
|
||||
Path: path,
|
||||
FileMode: os.FileMode(mode),
|
||||
Uid: stat.Uid,
|
||||
Gid: stat.Gid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HostDevices returns all devices that can be found under /dev directory.
|
||||
func HostDevices() ([]*Device, error) {
|
||||
return GetDevices("/dev")
|
||||
}
|
||||
|
||||
// GetDevices recursively traverses a directory specified by path
|
||||
// and returns all devices found there.
|
||||
func GetDevices(path string) ([]*Device, error) {
|
||||
files, err := ioutilReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var out []*Device
|
||||
for _, f := range files {
|
||||
switch {
|
||||
case f.IsDir():
|
||||
switch f.Name() {
|
||||
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
|
||||
// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
|
||||
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
|
||||
continue
|
||||
default:
|
||||
sub, err := GetDevices(filepath.Join(path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out = append(out, sub...)
|
||||
continue
|
||||
}
|
||||
case f.Name() == "console":
|
||||
continue
|
||||
}
|
||||
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
|
||||
if err != nil {
|
||||
if err == ErrNotADevice {
|
||||
continue
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if device.Type == FifoDevice {
|
||||
continue
|
||||
}
|
||||
out = append(out, device)
|
||||
}
|
||||
return out, nil
|
||||
}
|
14
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
14
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
@@ -185,7 +185,7 @@ func CriuPath(criupath string) func(*LinuxFactory) error {
|
||||
// configures the factory with the provided option funcs.
|
||||
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
|
||||
if root != "" {
|
||||
if err := os.MkdirAll(root, 0700); err != nil {
|
||||
if err := os.MkdirAll(root, 0o700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
}
|
||||
@@ -225,7 +225,7 @@ type LinuxFactory struct {
|
||||
// containers.
|
||||
CriuPath string
|
||||
|
||||
// New{u,g}uidmapPath is the path to the binaries used for mapping with
|
||||
// New{u,g}idmapPath is the path to the binaries used for mapping with
|
||||
// rootless containers.
|
||||
NewuidmapPath string
|
||||
NewgidmapPath string
|
||||
@@ -259,7 +259,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
||||
} else if !os.IsNotExist(err) {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.MkdirAll(containerRoot, 0711); err != nil {
|
||||
if err := os.MkdirAll(containerRoot, 0o711); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
|
||||
@@ -365,6 +365,12 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
defer consoleSocket.Close()
|
||||
}
|
||||
|
||||
logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE")
|
||||
logPipeFd, err := strconv.Atoi(logPipeFdStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE=%s to int: %s", logPipeFdStr, err)
|
||||
}
|
||||
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
@@ -387,7 +393,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
}
|
||||
}()
|
||||
|
||||
i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
|
||||
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
35
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
35
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
@@ -35,8 +35,8 @@ const (
|
||||
)
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"pid"`
|
||||
PidFirstChild int `json:"pid_first"`
|
||||
Pid int `json:"stage2_pid"`
|
||||
PidFirstChild int `json:"stage1_pid"`
|
||||
}
|
||||
|
||||
// network is an internal struct used to setup container networks.
|
||||
@@ -70,13 +70,14 @@ type initConfig struct {
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
SpecState *specs.State `json:"spec_state,omitempty"`
|
||||
Cgroup2Path string `json:"cgroup2_path,omitempty"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
|
||||
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
@@ -90,6 +91,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd
|
||||
pipe: pipe,
|
||||
consoleSocket: consoleSocket,
|
||||
config: config,
|
||||
logFd: logFd,
|
||||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
@@ -98,6 +100,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd
|
||||
parentPid: unix.Getppid(),
|
||||
config: config,
|
||||
fifoFd: fifoFd,
|
||||
logFd: logFd,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
@@ -129,6 +132,26 @@ func finalizeNamespace(config *initConfig) error {
|
||||
return errors.Wrap(err, "close exec fds")
|
||||
}
|
||||
|
||||
// we only do chdir if it's specified
|
||||
doChdir := config.Cwd != ""
|
||||
if doChdir {
|
||||
// First, attempt the chdir before setting up the user.
|
||||
// This could allow us to access a directory that the user running runc can access
|
||||
// but the container user cannot.
|
||||
err := unix.Chdir(config.Cwd)
|
||||
switch {
|
||||
case err == nil:
|
||||
doChdir = false
|
||||
case os.IsPermission(err):
|
||||
// If we hit an EPERM, we should attempt again after setting up user.
|
||||
// This will allow us to successfully chdir if the container user has access
|
||||
// to the directory, but the user running runc does not.
|
||||
// This is useful in cases where the cwd is also a volume that's been chowned to the container user.
|
||||
default:
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
|
||||
caps := &configs.Capabilities{}
|
||||
if config.Capabilities != nil {
|
||||
caps = config.Capabilities
|
||||
@@ -150,10 +173,8 @@ func finalizeNamespace(config *initConfig) error {
|
||||
if err := setupUser(config); err != nil {
|
||||
return errors.Wrap(err, "setup user")
|
||||
}
|
||||
// Change working directory AFTER the user has been set up.
|
||||
// Otherwise, if the cwd is also a volume that's been chowned to the container user (and not the user running runc),
|
||||
// this command will EPERM.
|
||||
if config.Cwd != "" {
|
||||
// Change working directory AFTER the user has been set up, if we haven't done it yet.
|
||||
if doChdir {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
}
|
||||
|
62
vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go
generated
vendored
62
vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go
generated
vendored
@@ -6,14 +6,14 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
configureMutex = sync.Mutex{}
|
||||
configureMutex sync.Mutex
|
||||
// loggingConfigured will be set once logging has been configured via invoking `ConfigureLogging`.
|
||||
// Subsequent invocations of `ConfigureLogging` would be no-op
|
||||
loggingConfigured = false
|
||||
@@ -23,41 +23,47 @@ type Config struct {
|
||||
LogLevel logrus.Level
|
||||
LogFormat string
|
||||
LogFilePath string
|
||||
LogPipeFd string
|
||||
LogPipeFd int
|
||||
LogCaller bool
|
||||
}
|
||||
|
||||
func ForwardLogs(logPipe io.Reader) {
|
||||
lineReader := bufio.NewReader(logPipe)
|
||||
for {
|
||||
line, err := lineReader.ReadBytes('\n')
|
||||
if len(line) > 0 {
|
||||
processEntry(line)
|
||||
func ForwardLogs(logPipe io.ReadCloser) chan error {
|
||||
done := make(chan error, 1)
|
||||
s := bufio.NewScanner(logPipe)
|
||||
|
||||
go func() {
|
||||
for s.Scan() {
|
||||
processEntry(s.Bytes())
|
||||
}
|
||||
if err == io.EOF {
|
||||
logrus.Debugf("log pipe has been closed: %+v", err)
|
||||
return
|
||||
if err := logPipe.Close(); err != nil {
|
||||
logrus.Errorf("error closing log source: %v", err)
|
||||
}
|
||||
if err != nil {
|
||||
logrus.Errorf("log pipe read error: %+v", err)
|
||||
}
|
||||
}
|
||||
// The only error we want to return is when reading from
|
||||
// logPipe has failed.
|
||||
done <- s.Err()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
return done
|
||||
}
|
||||
|
||||
func processEntry(text []byte) {
|
||||
type jsonLog struct {
|
||||
if len(text) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var jl struct {
|
||||
Level string `json:"level"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
var jl jsonLog
|
||||
if err := json.Unmarshal(text, &jl); err != nil {
|
||||
logrus.Errorf("failed to decode %q to json: %+v", text, err)
|
||||
logrus.Errorf("failed to decode %q to json: %v", text, err)
|
||||
return
|
||||
}
|
||||
|
||||
lvl, err := logrus.ParseLevel(jl.Level)
|
||||
if err != nil {
|
||||
logrus.Errorf("failed to parse log level %q: %v\n", jl.Level, err)
|
||||
logrus.Errorf("failed to parse log level %q: %v", jl.Level, err)
|
||||
return
|
||||
}
|
||||
logrus.StandardLogger().Logf(lvl, jl.Msg)
|
||||
@@ -68,18 +74,16 @@ func ConfigureLogging(config Config) error {
|
||||
defer configureMutex.Unlock()
|
||||
|
||||
if loggingConfigured {
|
||||
logrus.Debug("logging has already been configured")
|
||||
return nil
|
||||
return errors.New("logging has already been configured")
|
||||
}
|
||||
|
||||
logrus.SetLevel(config.LogLevel)
|
||||
logrus.SetReportCaller(config.LogCaller)
|
||||
|
||||
if config.LogPipeFd != "" {
|
||||
logPipeFdInt, err := strconv.Atoi(config.LogPipeFd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to convert _LIBCONTAINER_LOGPIPE environment variable value %q to int: %v", config.LogPipeFd, err)
|
||||
}
|
||||
logrus.SetOutput(os.NewFile(uintptr(logPipeFdInt), "logpipe"))
|
||||
// XXX: while 0 is a valid fd (usually stdin), here we assume
|
||||
// that we never deliberately set LogPipeFd to 0.
|
||||
if config.LogPipeFd > 0 {
|
||||
logrus.SetOutput(os.NewFile(uintptr(config.LogPipeFd), "logpipe"))
|
||||
} else if config.LogFilePath != "" {
|
||||
f, err := os.OpenFile(config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0644)
|
||||
if err != nil {
|
||||
|
30
vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go
generated
vendored
30
vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go
generated
vendored
@@ -3,48 +3,28 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func getValueFromCgroup(path, key string) (int, error) {
|
||||
content, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
return strconv.Atoi(arr[1])
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) {
|
||||
eventControlPath := filepath.Join(cgDir, evName)
|
||||
cgEvPath := filepath.Join(cgDir, cgEvName)
|
||||
fd, err := unix.InotifyInit()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to init inotify")
|
||||
}
|
||||
// watching oom kill
|
||||
evFd, err := unix.InotifyAddWatch(fd, eventControlPath, unix.IN_MODIFY)
|
||||
evFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, evName), unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, errors.Wrap(err, "unable to add inotify watch")
|
||||
}
|
||||
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
|
||||
cgFd, err := unix.InotifyAddWatch(fd, cgEvPath, unix.IN_MODIFY)
|
||||
cgFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, cgEvName), unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, errors.Wrap(err, "unable to add inotify watch")
|
||||
@@ -79,12 +59,12 @@ func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, err
|
||||
}
|
||||
switch int(rawEvent.Wd) {
|
||||
case evFd:
|
||||
oom, err := getValueFromCgroup(eventControlPath, "oom_kill")
|
||||
oom, err := fscommon.GetValueByKey(cgDir, evName, "oom_kill")
|
||||
if err != nil || oom > 0 {
|
||||
ch <- struct{}{}
|
||||
}
|
||||
case cgFd:
|
||||
pids, err := getValueFromCgroup(cgEvPath, "populated")
|
||||
pids, err := fscommon.GetValueByKey(cgDir, cgEvName, "populated")
|
||||
if err != nil || pids == 0 {
|
||||
return
|
||||
}
|
||||
|
91
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
91
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
@@ -51,7 +51,7 @@ type parentProcess interface {
|
||||
|
||||
setExternalDescriptors(fds []string)
|
||||
|
||||
forwardChildLogs()
|
||||
forwardChildLogs() chan error
|
||||
}
|
||||
|
||||
type filePair struct {
|
||||
@@ -65,6 +65,7 @@ type setnsProcess struct {
|
||||
logFilePair filePair
|
||||
cgroupPaths map[string]string
|
||||
rootlessCgroups bool
|
||||
manager cgroups.Manager
|
||||
intelRdtPath string
|
||||
config *initConfig
|
||||
fds []string
|
||||
@@ -88,6 +89,8 @@ func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
|
||||
func (p *setnsProcess) start() (retErr error) {
|
||||
defer p.messageSockPair.parent.Close()
|
||||
// get the "before" value of oom kill count
|
||||
oom, _ := p.manager.OOMKillCount()
|
||||
err := p.cmd.Start()
|
||||
// close the write-side of the pipes (controlled by child)
|
||||
p.messageSockPair.child.Close()
|
||||
@@ -95,19 +98,34 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "starting setns process")
|
||||
}
|
||||
|
||||
waitInit := initWaiter(p.messageSockPair.parent)
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom {
|
||||
// Someone in this cgroup was killed, this _might_ be us.
|
||||
retErr = newSystemErrorWithCause(retErr, "possibly OOM-killed")
|
||||
}
|
||||
werr := <-waitInit
|
||||
if werr != nil {
|
||||
logrus.WithError(werr).Warn()
|
||||
}
|
||||
err := ignoreTerminateErrors(p.terminate())
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("unable to terminate setnsProcess")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if p.bootstrapData != nil {
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
}
|
||||
err = <-waitInit
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
}
|
||||
@@ -244,8 +262,8 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func (p *setnsProcess) forwardChildLogs() {
|
||||
go logs.ForwardLogs(p.logFilePair.parent)
|
||||
func (p *setnsProcess) forwardChildLogs() chan error {
|
||||
return logs.ForwardLogs(p.logFilePair.parent)
|
||||
}
|
||||
|
||||
type initProcess struct {
|
||||
@@ -319,9 +337,36 @@ func (p *initProcess) start() (retErr error) {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
}
|
||||
|
||||
waitInit := initWaiter(p.messageSockPair.parent)
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
// terminate the process to ensure we can remove cgroups
|
||||
// Find out if init is killed by the kernel's OOM killer.
|
||||
// Get the count before killing init as otherwise cgroup
|
||||
// might be removed by systemd.
|
||||
oom, err := p.manager.OOMKillCount()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("unable to get oom kill count")
|
||||
} else if oom > 0 {
|
||||
// Does not matter what the particular error was,
|
||||
// its cause is most probably OOM, so report that.
|
||||
const oomError = "container init was OOM-killed (memory limit too low?)"
|
||||
|
||||
if logrus.GetLevel() >= logrus.DebugLevel {
|
||||
// Only show the original error if debug is set,
|
||||
// as it is not generally very useful.
|
||||
retErr = newSystemErrorWithCause(retErr, oomError)
|
||||
} else {
|
||||
retErr = newSystemError(errors.New(oomError))
|
||||
}
|
||||
}
|
||||
|
||||
werr := <-waitInit
|
||||
if werr != nil {
|
||||
logrus.WithError(werr).Warn()
|
||||
}
|
||||
|
||||
// Terminate the process to ensure we can remove cgroups.
|
||||
if err := ignoreTerminateErrors(p.terminate()); err != nil {
|
||||
logrus.WithError(err).Warn("unable to terminate initProcess")
|
||||
}
|
||||
@@ -347,6 +392,11 @@ func (p *initProcess) start() (retErr error) {
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
err = <-waitInit
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
childPid, err := p.getChildPid()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
|
||||
@@ -398,7 +448,7 @@ func (p *initProcess) start() (retErr error) {
|
||||
// call prestart and CreateRuntime hooks
|
||||
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
// Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
@@ -454,7 +504,7 @@ func (p *initProcess) start() (retErr error) {
|
||||
sentRun = true
|
||||
case procHooks:
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
@@ -580,8 +630,8 @@ func (p *initProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func (p *initProcess) forwardChildLogs() {
|
||||
go logs.ForwardLogs(p.logFilePair.parent)
|
||||
func (p *initProcess) forwardChildLogs() chan error {
|
||||
return logs.ForwardLogs(p.logFilePair.parent)
|
||||
}
|
||||
|
||||
func getPipeFds(pid int) ([]string, error) {
|
||||
@@ -649,3 +699,28 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// initWaiter returns a channel to wait on for making sure
|
||||
// runc init has finished the initial setup.
|
||||
func initWaiter(r io.Reader) chan error {
|
||||
ch := make(chan error, 1)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
inited := make([]byte, 1)
|
||||
n, err := r.Read(inited)
|
||||
if err == nil {
|
||||
if n < 1 {
|
||||
err = errors.New("short read")
|
||||
} else if inited[0] != 0 {
|
||||
err = fmt.Errorf("unexpected %d != 0", inited[0])
|
||||
} else {
|
||||
ch <- nil
|
||||
return
|
||||
}
|
||||
}
|
||||
ch <- newSystemErrorWithCause(err, "waiting for init preliminary setup")
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
|
6
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
@@ -77,7 +77,8 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func (p *restoredProcess) forwardChildLogs() {
|
||||
func (p *restoredProcess) forwardChildLogs() chan error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// nonChildProcess represents a process where the calling process is not
|
||||
@@ -125,5 +126,6 @@ func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) forwardChildLogs() {
|
||||
func (p *nonChildProcess) forwardChildLogs() chan error {
|
||||
return nil
|
||||
}
|
||||
|
312
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
312
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
@@ -17,18 +17,28 @@ import (
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/mrunalp/fileutils"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
|
||||
type mountConfig struct {
|
||||
root string
|
||||
label string
|
||||
cgroup2Path string
|
||||
rootlessCgroups bool
|
||||
cgroupns bool
|
||||
}
|
||||
|
||||
// needsSetupDev returns true if /dev needs to be set up.
|
||||
func needsSetupDev(config *configs.Config) bool {
|
||||
for _, m := range config.Mounts {
|
||||
@@ -48,7 +58,13 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
return newSystemErrorWithCause(err, "preparing rootfs")
|
||||
}
|
||||
|
||||
hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
|
||||
mountConfig := &mountConfig{
|
||||
root: config.Rootfs,
|
||||
label: config.MountLabel,
|
||||
cgroup2Path: iConfig.Cgroup2Path,
|
||||
rootlessCgroups: iConfig.RootlessCgroups,
|
||||
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
|
||||
}
|
||||
setupDev := needsSetupDev(config)
|
||||
for _, m := range config.Mounts {
|
||||
for _, precmd := range m.PremountCmds {
|
||||
@@ -56,7 +72,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
return newSystemErrorWithCause(err, "running premount command")
|
||||
}
|
||||
}
|
||||
if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil {
|
||||
if err := mountToRootfs(m, mountConfig); err != nil {
|
||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination)
|
||||
}
|
||||
|
||||
@@ -213,8 +229,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -222,7 +236,7 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||
func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -242,31 +256,34 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
Data: "mode=755",
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
}
|
||||
if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil {
|
||||
if err := mountToRootfs(tmpfs, c); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, b := range binds {
|
||||
if enableCgroupns {
|
||||
subsystemPath := filepath.Join(rootfs, b.Destination)
|
||||
if c.cgroupns {
|
||||
subsystemPath := filepath.Join(c.root, b.Destination)
|
||||
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
flags := defaultMountFlags
|
||||
if m.Flags&unix.MS_RDONLY != 0 {
|
||||
flags = flags | unix.MS_RDONLY
|
||||
}
|
||||
cgroupmount := &configs.Mount{
|
||||
Source: "cgroup",
|
||||
Device: "cgroup", // this is actually fstype
|
||||
Destination: subsystemPath,
|
||||
Flags: flags,
|
||||
Data: filepath.Base(subsystemPath),
|
||||
}
|
||||
if err := mountNewCgroup(cgroupmount); err != nil {
|
||||
if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
|
||||
flags := defaultMountFlags
|
||||
if m.Flags&unix.MS_RDONLY != 0 {
|
||||
flags = flags | unix.MS_RDONLY
|
||||
}
|
||||
var (
|
||||
source = "cgroup"
|
||||
data = filepath.Base(subsystemPath)
|
||||
)
|
||||
if data == "systemd" {
|
||||
data = cgroups.CgroupNamePrefix + data
|
||||
source = "systemd"
|
||||
}
|
||||
return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil {
|
||||
if err := mountToRootfs(b, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -276,7 +293,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
// symlink(2) is very dumb, it will just shove the path into
|
||||
// the link and doesn't do any checks or relative path
|
||||
// conversion. Also, don't error out if the cgroup already exists.
|
||||
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
|
||||
if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -284,30 +301,87 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||
cgroupPath, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
||||
dest, err := securejoin.SecureJoin(c.root, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(cgroupPath, 0755); err != nil {
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
if err == unix.EPERM || err == unix.EBUSY {
|
||||
return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||
if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
if err == unix.EPERM || err == unix.EBUSY {
|
||||
src := fs2.UnifiedMountpoint
|
||||
if c.cgroupns && c.cgroup2Path != "" {
|
||||
// Emulate cgroupns by bind-mounting
|
||||
// the container cgroup path rather than
|
||||
// the whole /sys/fs/cgroup.
|
||||
src = c.cgroup2Path
|
||||
}
|
||||
err = unix.Mount(src, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
if err == unix.ENOENT && c.rootlessCgroups {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
// Set up a scratch dir for the tmpfs on the host.
|
||||
tmpdir, err := prepareTmp("/tmp")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||
}
|
||||
defer cleanupTmp(tmpdir)
|
||||
tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
// Configure the *host* tmpdir as if it's the container mount. We change
|
||||
// m.Destination since we are going to mount *on the host*.
|
||||
oldDest := m.Destination
|
||||
m.Destination = tmpDir
|
||||
err = mountPropagate(m, "/", mountLabel)
|
||||
m.Destination = oldDest
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
||||
logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
|
||||
// Copy the container data to the host tmpdir. We append "/" to force
|
||||
// CopyDirectory to resolve the symlink rather than trying to copy the
|
||||
// symlink itself.
|
||||
if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
|
||||
return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err)
|
||||
}
|
||||
// Now move the mount into the container.
|
||||
if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
|
||||
return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %w", tmpDir, procfd, m.Destination, err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
rootfs := c.root
|
||||
mountLabel := c.label
|
||||
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch m.Device {
|
||||
@@ -338,53 +412,21 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
}
|
||||
return label.SetFileLabel(dest, mountLabel)
|
||||
case "tmpfs":
|
||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
tmpDir := ""
|
||||
// dest might be an absolute symlink, so it needs
|
||||
// to be resolved under rootfs.
|
||||
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Destination = dest
|
||||
stat, err := os.Stat(dest)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if copyUp {
|
||||
tmpdir, err := prepareTmp("/tmp")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||
}
|
||||
defer cleanupTmp(tmpdir)
|
||||
tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
m.Destination = tmpDir
|
||||
|
||||
if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
||||
err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
||||
} else {
|
||||
err = mountPropagate(m, rootfs, mountLabel)
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if copyUp {
|
||||
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
||||
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
}
|
||||
return errMsg
|
||||
}
|
||||
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
||||
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
}
|
||||
return errMsg
|
||||
}
|
||||
}
|
||||
if stat != nil {
|
||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||
return err
|
||||
@@ -424,23 +466,13 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
}
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return mountCgroupV2(m, rootfs, mountLabel, enableCgroupns)
|
||||
return mountCgroupV2(m, c)
|
||||
}
|
||||
return mountCgroupV1(m, rootfs, mountLabel, enableCgroupns)
|
||||
return mountCgroupV1(m, c)
|
||||
default:
|
||||
// ensure that the destination of the mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
// evil stuff to try to escape the container's rootfs.
|
||||
var err error
|
||||
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -603,7 +635,7 @@ func reOpenDevNull() error {
|
||||
|
||||
// Create the device nodes in the container.
|
||||
func createDevices(config *configs.Config) error {
|
||||
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
|
||||
useBindMount := userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
|
||||
oldMask := unix.Umask(0000)
|
||||
for _, node := range config.Devices {
|
||||
|
||||
@@ -623,7 +655,7 @@ func createDevices(config *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func bindMountDeviceNode(dest string, node *devices.Device) error {
|
||||
func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
@@ -631,7 +663,9 @@ func bindMountDeviceNode(dest string, node *devices.Device) error {
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
||||
return utils.WithProcfd(rootfs, dest, func(procfd string) error {
|
||||
return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
|
||||
})
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
@@ -640,18 +674,21 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
|
||||
// The node only exists for cgroup reasons, ignore it here.
|
||||
return nil
|
||||
}
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
dest, err := securejoin.SecureJoin(rootfs, node.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
return bindMountDeviceNode(rootfs, dest, node)
|
||||
}
|
||||
if err := mknodDevice(dest, node); err != nil {
|
||||
if os.IsExist(err) {
|
||||
return nil
|
||||
} else if os.IsPermission(err) {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
return bindMountDeviceNode(rootfs, dest, node)
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -931,9 +968,20 @@ func readonlyPath(path string) error {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
return &os.PathError{Op: "bind-mount", Path: path, Err: err}
|
||||
}
|
||||
return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
|
||||
|
||||
var s unix.Statfs_t
|
||||
if err := unix.Statfs(path, &s); err != nil {
|
||||
return &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
|
||||
|
||||
if err := unix.Mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
|
||||
return &os.PathError{Op: "bind-mount-ro", Path: path, Err: err}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// remountReadonly will remount an existing mount point and ensure that it is read-only.
|
||||
@@ -987,61 +1035,47 @@ func writeSystemProperty(key, value string) error {
|
||||
}
|
||||
|
||||
func remount(m *configs.Mount, rootfs string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||
})
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
// of propagation flags.
|
||||
// of propagation flags. This will always be scoped inside the container rootfs.
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
)
|
||||
if libcontainerUtils.CleanPath(dest) == "/dev" {
|
||||
// Delay mounting the filesystem read-only if we need to do further
|
||||
// operations on it. We need to set up files in "/dev" and tmpfs mounts may
|
||||
// need to be chmod-ed after mounting. The mount will be remounted ro later
|
||||
// in finalizeRootfs() if necessary.
|
||||
if libcontainerUtils.CleanPath(m.Destination) == "/dev" || m.Device == "tmpfs" {
|
||||
flags &= ^unix.MS_RDONLY
|
||||
}
|
||||
|
||||
// Mount it rw to allow chmod operation. A remount will be performed
|
||||
// later to make it ro if set.
|
||||
if m.Device == "tmpfs" {
|
||||
flags &= ^unix.MS_RDONLY
|
||||
// Because the destination is inside a container path which might be
|
||||
// mutating underneath us, we verify that we are actually going to mount
|
||||
// inside the container with WithProcfd() -- mounting through a procfd
|
||||
// mounts on the target.
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("mount through procfd: %w", err)
|
||||
}
|
||||
|
||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
// We have to apply mount propagation flags in a separate WithProcfd() call
|
||||
// because the previous call invalidates the passed procfd -- the mount
|
||||
// target needs to be re-opened.
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountNewCgroup(m *configs.Mount) error {
|
||||
var (
|
||||
data = m.Data
|
||||
source = m.Source
|
||||
)
|
||||
if data == "systemd" {
|
||||
data = cgroups.CgroupNamePrefix + data
|
||||
source = "systemd"
|
||||
}
|
||||
if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
|
||||
return err
|
||||
return nil
|
||||
}); err != nil {
|
||||
return fmt.Errorf("change mount propagation through procfd: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
25
vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
generated
vendored
25
vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
generated
vendored
@@ -3,6 +3,7 @@
|
||||
package patchbpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"os"
|
||||
@@ -114,14 +115,26 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
|
||||
defer wtr.Close()
|
||||
defer rdr.Close()
|
||||
|
||||
readerBuffer := new(bytes.Buffer)
|
||||
errChan := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := io.Copy(readerBuffer, rdr)
|
||||
errChan <- err
|
||||
close(errChan)
|
||||
}()
|
||||
|
||||
if err := filter.ExportBPF(wtr); err != nil {
|
||||
return nil, errors.Wrap(err, "exporting BPF")
|
||||
}
|
||||
// Close so that the reader actually gets EOF.
|
||||
_ = wtr.Close()
|
||||
|
||||
if copyErr := <-errChan; copyErr != nil {
|
||||
return nil, errors.Wrap(copyErr, "reading from ExportBPF pipe")
|
||||
}
|
||||
|
||||
// Parse the instructions.
|
||||
rawProgram, err := parseProgram(rdr)
|
||||
rawProgram, err := parseProgram(readerBuffer)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parsing generated BPF filter")
|
||||
}
|
||||
@@ -510,6 +523,11 @@ func assemble(program []bpf.Instruction) ([]unix.SockFilter, error) {
|
||||
}
|
||||
|
||||
func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) {
|
||||
// Patch the generated cBPF only when there is not a defaultErrnoRet set
|
||||
// and it is different from ENOSYS
|
||||
if config.DefaultErrnoRet != nil && *config.DefaultErrnoRet == uint(retErrnoEnosys) {
|
||||
return nil, nil
|
||||
}
|
||||
// We only add the stub if the default action is not permissive.
|
||||
if isAllowAction(config.DefaultAction) {
|
||||
logrus.Debugf("seccomp: skipping -ENOSYS stub filter generation")
|
||||
@@ -584,9 +602,12 @@ func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
|
||||
unix.SECCOMP_MODE_FILTER,
|
||||
uintptr(unsafe.Pointer(&fprog)), 0, 0)
|
||||
} else {
|
||||
_, _, err = unix.RawSyscall(unix.SYS_SECCOMP,
|
||||
_, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
|
||||
uintptr(C.C_SET_MODE_FILTER),
|
||||
uintptr(flags), uintptr(unsafe.Pointer(&fprog)))
|
||||
if errno != 0 {
|
||||
err = errno
|
||||
}
|
||||
}
|
||||
runtime.KeepAlive(filter)
|
||||
runtime.KeepAlive(fprog)
|
||||
|
50
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
50
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
@@ -3,11 +3,8 @@
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp/patchbpf"
|
||||
@@ -39,7 +36,7 @@ func InitSeccomp(config *configs.Seccomp) error {
|
||||
return errors.New("cannot initialize Seccomp - nil config passed")
|
||||
}
|
||||
|
||||
defaultAction, err := getAction(config.DefaultAction, nil)
|
||||
defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet)
|
||||
if err != nil {
|
||||
return errors.New("error initializing seccomp - invalid default action")
|
||||
}
|
||||
@@ -80,24 +77,6 @@ func InitSeccomp(config *configs.Seccomp) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns if the kernel has been configured to support seccomp.
|
||||
func IsEnabled() bool {
|
||||
// Try to read from /proc/self/status for kernels > 3.8
|
||||
s, err := parseStatusFile("/proc/self/status")
|
||||
if err != nil {
|
||||
// Check if Seccomp is supported, via CONFIG_SECCOMP.
|
||||
if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
|
||||
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
|
||||
if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
_, ok := s["Seccomp"]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||
func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
|
||||
switch act {
|
||||
@@ -237,33 +216,6 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseStatusFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
status := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
parts := strings.Split(text, ":")
|
||||
|
||||
if len(parts) <= 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
status[parts[0]] = parts[1]
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// Version returns major, minor, and micro.
|
||||
func Version() (uint, uint, uint) {
|
||||
return libseccomp.GetLibraryVersion()
|
||||
|
5
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
@@ -18,11 +18,6 @@ func InitSeccomp(config *configs.Seccomp) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns false, because it is not supported.
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Version returns major, minor, and micro.
|
||||
func Version() (uint, uint, uint) {
|
||||
return 0, 0, 0
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -21,6 +22,7 @@ type linuxSetnsInit struct {
|
||||
pipe *os.File
|
||||
consoleSocket *os.File
|
||||
config *initConfig
|
||||
logFd int
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) getSessionRingName() string {
|
||||
@@ -86,5 +88,11 @@ func (l *linuxSetnsInit) Init() error {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
}
|
||||
}
|
||||
logrus.Debugf("setns_init: about to exec")
|
||||
// Close the log pipe fd so the parent's ForwardLogs can exit.
|
||||
if err := unix.Close(l.logFd); err != nil {
|
||||
return newSystemErrorWithCause(err, "closing log pipe fd")
|
||||
}
|
||||
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
||||
|
9
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
9
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -24,6 +25,7 @@ type linuxStandardInit struct {
|
||||
consoleSocket *os.File
|
||||
parentPid int
|
||||
fifoFd int
|
||||
logFd int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
@@ -180,7 +182,14 @@ func (l *linuxStandardInit) Init() error {
|
||||
return err
|
||||
}
|
||||
// Close the pipe to signal that we have completed our init.
|
||||
logrus.Debugf("init: closing the pipe to signal completion")
|
||||
l.pipe.Close()
|
||||
|
||||
// Close the log pipe fd so the parent's ForwardLogs can exit.
|
||||
if err := unix.Close(l.logFd); err != nil {
|
||||
return newSystemErrorWithCause(err, "closing log pipe fd")
|
||||
}
|
||||
|
||||
// Wait for the FIFO to be opened on the other side before exec-ing the
|
||||
// user process. We open it through /proc/self/fd/$fd, because the fd that
|
||||
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
|
||||
|
49
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
49
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
@@ -3,12 +3,9 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -87,52 +84,6 @@ func Setctty() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
inUserNS bool
|
||||
nsOnce sync.Once
|
||||
)
|
||||
|
||||
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
func RunningInUserNS() bool {
|
||||
nsOnce.Do(func() {
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return
|
||||
}
|
||||
inUserNS = UIDMapInUserNS(uidmap)
|
||||
})
|
||||
return inUserNS
|
||||
}
|
||||
|
||||
func UIDMapInUserNS(uidmap []user.IDMap) bool {
|
||||
/*
|
||||
* We assume we are in the initial user namespace if we have a full
|
||||
* range - 4294967295 uids starting at uid 0.
|
||||
*/
|
||||
if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// GetParentNSeuid returns the euid within the parent user namespace
|
||||
func GetParentNSeuid() int64 {
|
||||
euid := int64(os.Geteuid())
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return euid
|
||||
}
|
||||
for _, um := range uidmap {
|
||||
if um.ID <= euid && euid <= um.ID+um.Count-1 {
|
||||
return um.ParentID + euid - um.ID
|
||||
}
|
||||
}
|
||||
return euid
|
||||
}
|
||||
|
||||
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
||||
func SetSubreaper(i int) error {
|
||||
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
|
27
vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go
generated
vendored
27
vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go
generated
vendored
@@ -1,27 +0,0 @@
|
||||
// +build !linux
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
// RunningInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func RunningInUserNS() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// UIDMapInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func UIDMapInUserNS(uidmap []user.IDMap) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// GetParentNSeuid returns the euid within the parent user namespace
|
||||
// Always returns os.Geteuid on non-linux
|
||||
func GetParentNSeuid() int {
|
||||
return os.Geteuid()
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/system/userns_deprecated.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/system/userns_deprecated.go
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
package system
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/userns"
|
||||
|
||||
var RunningInUserNS = userns.RunningInUserNS
|
2
vendor/github.com/opencontainers/runc/libcontainer/user/MAINTAINERS
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/user/MAINTAINERS
generated
vendored
@@ -1,2 +0,0 @@
|
||||
Tianon Gravi <admwiggin@gmail.com> (@tianon)
|
||||
Aleksa Sarai <cyphar@cyphar.com> (@cyphar)
|
41
vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
generated
vendored
41
vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
generated
vendored
@@ -1,41 +0,0 @@
|
||||
package user
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
// The current operating system does not provide the required data for user lookups.
|
||||
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
|
||||
// No matching entries found in file.
|
||||
ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
|
||||
ErrNoGroupEntries = errors.New("no matching entries in group file")
|
||||
)
|
||||
|
||||
// LookupUser looks up a user by their username in /etc/passwd. If the user
|
||||
// cannot be found (or there is no /etc/passwd file on the filesystem), then
|
||||
// LookupUser returns an error.
|
||||
func LookupUser(username string) (User, error) {
|
||||
return lookupUser(username)
|
||||
}
|
||||
|
||||
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
|
||||
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
|
||||
// returns an error.
|
||||
func LookupUid(uid int) (User, error) {
|
||||
return lookupUid(uid)
|
||||
}
|
||||
|
||||
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
|
||||
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
|
||||
// returns an error.
|
||||
func LookupGroup(groupname string) (Group, error) {
|
||||
return lookupGroup(groupname)
|
||||
}
|
||||
|
||||
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
|
||||
// be found (or there is no /etc/group file on the filesystem), then LookupGid
|
||||
// returns an error.
|
||||
func LookupGid(gid int) (Group, error) {
|
||||
return lookupGid(gid)
|
||||
}
|
20
vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
generated
vendored
@@ -16,13 +16,19 @@ const (
|
||||
unixGroupPath = "/etc/group"
|
||||
)
|
||||
|
||||
func lookupUser(username string) (User, error) {
|
||||
// LookupUser looks up a user by their username in /etc/passwd. If the user
|
||||
// cannot be found (or there is no /etc/passwd file on the filesystem), then
|
||||
// LookupUser returns an error.
|
||||
func LookupUser(username string) (User, error) {
|
||||
return lookupUserFunc(func(u User) bool {
|
||||
return u.Name == username
|
||||
})
|
||||
}
|
||||
|
||||
func lookupUid(uid int) (User, error) {
|
||||
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
|
||||
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
|
||||
// returns an error.
|
||||
func LookupUid(uid int) (User, error) {
|
||||
return lookupUserFunc(func(u User) bool {
|
||||
return u.Uid == uid
|
||||
})
|
||||
@@ -51,13 +57,19 @@ func lookupUserFunc(filter func(u User) bool) (User, error) {
|
||||
return users[0], nil
|
||||
}
|
||||
|
||||
func lookupGroup(groupname string) (Group, error) {
|
||||
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
|
||||
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
|
||||
// returns an error.
|
||||
func LookupGroup(groupname string) (Group, error) {
|
||||
return lookupGroupFunc(func(g Group) bool {
|
||||
return g.Name == groupname
|
||||
})
|
||||
}
|
||||
|
||||
func lookupGid(gid int) (Group, error) {
|
||||
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
|
||||
// be found (or there is no /etc/group file on the filesystem), then LookupGid
|
||||
// returns an error.
|
||||
func LookupGid(gid int) (Group, error) {
|
||||
return lookupGroupFunc(func(g Group) bool {
|
||||
return g.Gid == gid
|
||||
})
|
||||
|
40
vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go
generated
vendored
40
vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go
generated
vendored
@@ -1,40 +0,0 @@
|
||||
// +build windows
|
||||
|
||||
package user
|
||||
|
||||
import (
|
||||
"os/user"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func lookupUser(username string) (User, error) {
|
||||
u, err := user.Lookup(username)
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return userFromOS(u)
|
||||
}
|
||||
|
||||
func lookupUid(uid int) (User, error) {
|
||||
u, err := user.LookupId(strconv.Itoa(uid))
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return userFromOS(u)
|
||||
}
|
||||
|
||||
func lookupGroup(groupname string) (Group, error) {
|
||||
g, err := user.LookupGroup(groupname)
|
||||
if err != nil {
|
||||
return Group{}, err
|
||||
}
|
||||
return groupFromOS(g)
|
||||
}
|
||||
|
||||
func lookupGid(gid int) (Group, error) {
|
||||
g, err := user.LookupGroupId(strconv.Itoa(gid))
|
||||
if err != nil {
|
||||
return Group{}, err
|
||||
}
|
||||
return groupFromOS(g)
|
||||
}
|
48
vendor/github.com/opencontainers/runc/libcontainer/user/user.go
generated
vendored
48
vendor/github.com/opencontainers/runc/libcontainer/user/user.go
generated
vendored
@@ -2,10 +2,10 @@ package user
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/user"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
@@ -16,6 +16,13 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
// The current operating system does not provide the required data for user lookups.
|
||||
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
|
||||
|
||||
// No matching entries found in file.
|
||||
ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
|
||||
ErrNoGroupEntries = errors.New("no matching entries in group file")
|
||||
|
||||
ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId)
|
||||
)
|
||||
|
||||
@@ -29,28 +36,6 @@ type User struct {
|
||||
Shell string
|
||||
}
|
||||
|
||||
// userFromOS converts an os/user.(*User) to local User
|
||||
//
|
||||
// (This does not include Pass, Shell or Gecos)
|
||||
func userFromOS(u *user.User) (User, error) {
|
||||
newUser := User{
|
||||
Name: u.Username,
|
||||
Home: u.HomeDir,
|
||||
}
|
||||
id, err := strconv.Atoi(u.Uid)
|
||||
if err != nil {
|
||||
return newUser, err
|
||||
}
|
||||
newUser.Uid = id
|
||||
|
||||
id, err = strconv.Atoi(u.Gid)
|
||||
if err != nil {
|
||||
return newUser, err
|
||||
}
|
||||
newUser.Gid = id
|
||||
return newUser, nil
|
||||
}
|
||||
|
||||
type Group struct {
|
||||
Name string
|
||||
Pass string
|
||||
@@ -58,23 +43,6 @@ type Group struct {
|
||||
List []string
|
||||
}
|
||||
|
||||
// groupFromOS converts an os/user.(*Group) to local Group
|
||||
//
|
||||
// (This does not include Pass or List)
|
||||
func groupFromOS(g *user.Group) (Group, error) {
|
||||
newGroup := Group{
|
||||
Name: g.Name,
|
||||
}
|
||||
|
||||
id, err := strconv.Atoi(g.Gid)
|
||||
if err != nil {
|
||||
return newGroup, err
|
||||
}
|
||||
newGroup.Gid = id
|
||||
|
||||
return newGroup, nil
|
||||
}
|
||||
|
||||
// SubID represents an entry in /etc/sub{u,g}id
|
||||
type SubID struct {
|
||||
Name string
|
||||
|
42
vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
generated
vendored
Normal file
42
vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
generated
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
// +build gofuzz
|
||||
|
||||
package user
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func IsDivisbleBy(n int, divisibleby int) bool {
|
||||
return (n % divisibleby) == 0
|
||||
}
|
||||
|
||||
func FuzzUser(data []byte) int {
|
||||
if len(data) == 0 {
|
||||
return -1
|
||||
}
|
||||
if !IsDivisbleBy(len(data), 5) {
|
||||
return -1
|
||||
}
|
||||
|
||||
var divided [][]byte
|
||||
|
||||
chunkSize := len(data) / 5
|
||||
|
||||
for i := 0; i < len(data); i += chunkSize {
|
||||
end := i + chunkSize
|
||||
|
||||
divided = append(divided, data[i:end])
|
||||
}
|
||||
|
||||
_, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil)
|
||||
|
||||
var passwd, group io.Reader
|
||||
|
||||
group = strings.NewReader(string(divided[1]))
|
||||
_, _ = GetAdditionalGroups([]string{string(divided[2])}, group)
|
||||
|
||||
passwd = strings.NewReader(string(divided[3]))
|
||||
_, _ = GetExecUser(string(divided[4]), nil, passwd, group)
|
||||
return 1
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
package userns
|
||||
|
||||
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
var RunningInUserNS = runningInUserNS
|
15
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
// +build gofuzz
|
||||
|
||||
package userns
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
func FuzzUIDMap(data []byte) int {
|
||||
uidmap, _ := user.ParseIDMap(strings.NewReader(string(data)))
|
||||
_ = uidMapInUserNS(uidmap)
|
||||
return 1
|
||||
}
|
37
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
generated
vendored
Normal file
37
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
generated
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
package userns
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
var (
|
||||
inUserNS bool
|
||||
nsOnce sync.Once
|
||||
)
|
||||
|
||||
// runningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
func runningInUserNS() bool {
|
||||
nsOnce.Do(func() {
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return
|
||||
}
|
||||
inUserNS = uidMapInUserNS(uidmap)
|
||||
})
|
||||
return inUserNS
|
||||
}
|
||||
|
||||
func uidMapInUserNS(uidmap []user.IDMap) bool {
|
||||
/*
|
||||
* We assume we are in the initial user namespace if we have a full
|
||||
* range - 4294967295 uids starting at uid 0.
|
||||
*/
|
||||
if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
17
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
generated
vendored
Normal file
17
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
generated
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
// +build !linux
|
||||
|
||||
package userns
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/user"
|
||||
|
||||
// runningInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func runningInUserNS() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// uidMapInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func uidMapInUserNS(uidmap []user.IDMap) bool {
|
||||
return false
|
||||
}
|
54
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
54
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
@@ -3,12 +3,15 @@ package utils
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -88,6 +91,57 @@ func CleanPath(path string) string {
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
|
||||
// stripRoot returns the passed path, stripping the root path if it was
|
||||
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||
// as absolute, and the returned path will also always be absolute. In
|
||||
// addition, the paths are cleaned before stripping the root.
|
||||
func stripRoot(root, path string) string {
|
||||
// Make the paths clean and absolute.
|
||||
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||
switch {
|
||||
case path == root:
|
||||
path = "/"
|
||||
case root == "/":
|
||||
// do nothing
|
||||
case strings.HasPrefix(path, root+"/"):
|
||||
path = strings.TrimPrefix(path, root+"/")
|
||||
}
|
||||
return CleanPath("/" + path)
|
||||
}
|
||||
|
||||
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||
// corresponding to the unsafePath resolved within the root. Before passing the
|
||||
// fd, this path is verified to have been inside the root -- so operating on it
|
||||
// through the passed fdpath should be safe. Do not access this path through
|
||||
// the original path strings, and do not attempt to use the pathname outside of
|
||||
// the passed closure (the file handle will be freed once the closure returns).
|
||||
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
// Remove the root then forcefully resolve inside the root.
|
||||
unsafePath = stripRoot(root, unsafePath)
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving path inside rootfs failed: %v", err)
|
||||
}
|
||||
|
||||
// Open the target path.
|
||||
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open o_path procfd: %w", err)
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Double-check the path is the one we expected.
|
||||
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
||||
if realpath, err := os.Readlink(procfd); err != nil {
|
||||
return fmt.Errorf("procfd verification failed: %w", err)
|
||||
} else if realpath != path {
|
||||
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||
}
|
||||
|
||||
// Run the closure.
|
||||
return fn(procfd)
|
||||
}
|
||||
|
||||
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||
// returns the corresponding value. The pairs must be separated with '='.
|
||||
func SearchLabels(labels []string, query string) string {
|
||||
|
Reference in New Issue
Block a user